Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit f46fcfe

Browse files
committedAug 26, 2024
Reduce code duplication by moving partition_lomuto_branchless_simple into quicksort module
1 parent 1805c29 commit f46fcfe

File tree

3 files changed

+39
-79
lines changed

3 files changed

+39
-79
lines changed
 

‎core/src/slice/sort/select.rs

+1-77
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,11 @@
66
//! for pivot selection. Using this as a fallback ensures O(n) worst case running time with
77
//! better performance than one would get using heapsort as fallback.
88
9-
use crate::intrinsics;
109
use crate::mem::{self, SizedTypeProperties};
1110
#[cfg(not(feature = "optimize_for_size"))]
1211
use crate::slice::sort::shared::pivot::choose_pivot;
1312
#[cfg(not(feature = "optimize_for_size"))]
1413
use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;
15-
#[cfg(not(feature = "optimize_for_size"))]
1614
use crate::slice::sort::unstable::quicksort::partition;
1715

1816
/// Reorders the slice such that the element at `index` is at its final sorted position.
@@ -252,15 +250,7 @@ fn median_of_ninthers<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F)
252250

253251
median_of_medians(&mut v[lo..lo + frac], is_less, pivot);
254252

255-
#[cfg(not(feature = "optimize_for_size"))]
256-
{
257-
partition(v, lo + pivot, is_less)
258-
}
259-
260-
#[cfg(feature = "optimize_for_size")]
261-
{
262-
partition_size_opt(v, lo + pivot, is_less)
263-
}
253+
partition(v, lo + pivot, is_less)
264254
}
265255

266256
/// Moves around the 9 elements at the indices a..i, such that
@@ -351,69 +341,3 @@ fn bubble_sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) {
351341
n -= 1;
352342
}
353343
}
354-
355-
#[cfg(feature = "optimize_for_size")]
356-
fn partition_size_opt<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize
357-
where
358-
F: FnMut(&T, &T) -> bool,
359-
{
360-
let len = v.len();
361-
362-
// Allows for panic-free code-gen by proving this property to the compiler.
363-
if len == 0 {
364-
return 0;
365-
}
366-
367-
if pivot >= len {
368-
intrinsics::abort();
369-
}
370-
371-
// SAFETY: We checked that `pivot` is in-bounds.
372-
unsafe {
373-
// Place the pivot at the beginning of slice.
374-
v.swap_unchecked(0, pivot);
375-
}
376-
let (pivot, v_without_pivot) = v.split_at_mut(1);
377-
378-
// Assuming that Rust generates noalias LLVM IR we can be sure that a partition function
379-
// signature of the form `(v: &mut [T], pivot: &T)` guarantees that pivot and v can't alias.
380-
// Having this guarantee is crucial for optimizations. It's possible to copy the pivot value
381-
// into a stack value, but this creates issues for types with interior mutability mandating
382-
// a drop guard.
383-
let pivot = &mut pivot[0];
384-
385-
let num_lt = partition_lomuto_branchless_simple(v_without_pivot, pivot, is_less);
386-
387-
if num_lt >= len {
388-
intrinsics::abort();
389-
}
390-
391-
// SAFETY: We checked that `num_lt` is in-bounds.
392-
unsafe {
393-
// Place the pivot between the two partitions.
394-
v.swap_unchecked(0, num_lt);
395-
}
396-
397-
num_lt
398-
}
399-
400-
#[cfg(feature = "optimize_for_size")]
401-
fn partition_lomuto_branchless_simple<T, F: FnMut(&T, &T) -> bool>(
402-
v: &mut [T],
403-
pivot: &T,
404-
is_less: &mut F,
405-
) -> usize {
406-
let mut left = 0;
407-
408-
for right in 0..v.len() {
409-
// SAFETY: `left` can at max be incremented by 1 each loop iteration, which implies that
410-
// left <= right and that both are in-bounds.
411-
unsafe {
412-
let right_is_lt = is_less(v.get_unchecked(right), pivot);
413-
v.swap_unchecked(left, right);
414-
left += right_is_lt as usize;
415-
}
416-
}
417-
418-
left
419-
}

‎core/src/slice/sort/unstable/mod.rs

-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ use crate::slice::sort::shared::find_existing_run;
88
use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;
99

1010
pub(crate) mod heapsort;
11-
#[cfg(not(feature = "optimize_for_size"))]
1211
pub(crate) mod quicksort;
1312

1413
/// Unstable sort called ipnsort by Lukas Bergdoll and Orson Peters.

‎core/src/slice/sort/unstable/quicksort.rs

+38-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
//! This module contains an unstable quicksort and two partition implementations.
22
33
use crate::mem::{self, ManuallyDrop};
4+
#[cfg(not(feature = "optimize_for_size"))]
45
use crate::slice::sort::shared::pivot::choose_pivot;
6+
#[cfg(not(feature = "optimize_for_size"))]
57
use crate::slice::sort::shared::smallsort::UnstableSmallSortTypeImpl;
68
use crate::{intrinsics, ptr};
79

@@ -11,6 +13,7 @@ use crate::{intrinsics, ptr};
1113
///
1214
/// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
1315
/// this function will immediately switch to heapsort.
16+
#[cfg(not(feature = "optimize_for_size"))]
1417
pub(crate) fn quicksort<'a, T, F>(
1518
mut v: &'a mut [T],
1619
mut ancestor_pivot: Option<&'a T>,
@@ -138,7 +141,16 @@ const fn inst_partition<T, F: FnMut(&T, &T) -> bool>() -> fn(&mut [T], &T, &mut
138141
if mem::size_of::<T>() <= MAX_BRANCHLESS_PARTITION_SIZE {
139142
// Specialize for types that are relatively cheap to copy, where branchless optimizations
140143
// have large leverage e.g. `u64` and `String`.
141-
partition_lomuto_branchless_cyclic::<T, F>
144+
145+
#[cfg(not(feature = "optimize_for_size"))]
146+
{
147+
partition_lomuto_branchless_cyclic::<T, F>
148+
}
149+
150+
#[cfg(feature = "optimize_for_size")]
151+
{
152+
partition_lomuto_branchless_simple::<T, F>
153+
}
142154
} else {
143155
partition_hoare_branchy_cyclic::<T, F>
144156
}
@@ -224,6 +236,7 @@ where
224236
}
225237
}
226238

239+
#[cfg(not(feature = "optimize_for_size"))]
227240
struct PartitionState<T> {
228241
// The current element that is being looked at, scans left to right through slice.
229242
right: *mut T,
@@ -234,6 +247,7 @@ struct PartitionState<T> {
234247
gap: GapGuardRaw<T>,
235248
}
236249

250+
#[cfg(not(feature = "optimize_for_size"))]
237251
fn partition_lomuto_branchless_cyclic<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize
238252
where
239253
F: FnMut(&T, &T) -> bool,
@@ -325,6 +339,27 @@ where
325339
}
326340
}
327341

342+
#[cfg(feature = "optimize_for_size")]
343+
fn partition_lomuto_branchless_simple<T, F: FnMut(&T, &T) -> bool>(
344+
v: &mut [T],
345+
pivot: &T,
346+
is_less: &mut F,
347+
) -> usize {
348+
let mut left = 0;
349+
350+
for right in 0..v.len() {
351+
// SAFETY: `left` can at max be incremented by 1 each loop iteration, which implies that
352+
// left <= right and that both are in-bounds.
353+
unsafe {
354+
let right_is_lt = is_less(v.get_unchecked(right), pivot);
355+
v.swap_unchecked(left, right);
356+
left += right_is_lt as usize;
357+
}
358+
}
359+
360+
left
361+
}
362+
328363
struct GapGuard<T> {
329364
pos: *mut T,
330365
value: ManuallyDrop<T>,
@@ -342,11 +377,13 @@ impl<T> Drop for GapGuard<T> {
342377

343378
/// Ideally this wouldn't be needed and we could just use the regular GapGuard.
344379
/// See comment in [`partition_lomuto_branchless_cyclic`].
380+
#[cfg(not(feature = "optimize_for_size"))]
345381
struct GapGuardRaw<T> {
346382
pos: *mut T,
347383
value: *mut T,
348384
}
349385

386+
#[cfg(not(feature = "optimize_for_size"))]
350387
impl<T> Drop for GapGuardRaw<T> {
351388
fn drop(&mut self) {
352389
// SAFETY: `self` MUST be constructed in a way that makes copying the gap value into

0 commit comments

Comments
 (0)
Please sign in to comment.