Skip to content

Commit e2223c9

Browse files
committed
Auto merge of #68827 - ssomers:btree_navigation_revisited, r=Mark-Simulacrum
BTreeMap navigation done safer & faster It turns out that there was a faster way to do the tree navigation code bundled in #67073, by moving from edge to KV and from KV to next edge separately. It extracts most of the code as safe functions, and contains the duplication of handles within the short wrapper functions. This somehow hits a sweet spot in the compiler because it reports boosts all over the board: ``` >cargo benchcmp pre3.txt posz4.txt --threshold 5 name pre3.txt ns/iter posz4.txt ns/iter diff ns/iter diff % speedup btree::map::first_and_last_0 40 37 -3 -7.50% x 1.08 btree::map::first_and_last_100 58 44 -14 -24.14% x 1.32 btree::map::iter_1000 8,920 3,419 -5,501 -61.67% x 2.61 btree::map::iter_100000 1,069,290 411,615 -657,675 -61.51% x 2.60 btree::map::iter_20 169 58 -111 -65.68% x 2.91 btree::map::iter_mut_1000 8,701 3,303 -5,398 -62.04% x 2.63 btree::map::iter_mut_100000 1,034,560 405,975 -628,585 -60.76% x 2.55 btree::map::iter_mut_20 165 58 -107 -64.85% x 2.84 btree::set::clone_100 1,831 1,562 -269 -14.69% x 1.17 btree::set::clone_100_and_clear 1,831 1,565 -266 -14.53% x 1.17 btree::set::clone_100_and_into_iter 1,917 1,541 -376 -19.61% x 1.24 btree::set::clone_100_and_pop_all 2,609 2,441 -168 -6.44% x 1.07 btree::set::clone_100_and_remove_all 4,598 3,927 -671 -14.59% x 1.17 btree::set::clone_100_and_remove_half 2,765 2,551 -214 -7.74% x 1.08 btree::set::clone_10k 191,610 164,616 -26,994 -14.09% x 1.16 btree::set::clone_10k_and_clear 192,003 164,616 -27,387 -14.26% x 1.17 btree::set::clone_10k_and_into_iter 200,037 163,010 -37,027 -18.51% x 1.23 btree::set::clone_10k_and_pop_all 267,023 250,913 -16,110 -6.03% x 1.06 btree::set::clone_10k_and_remove_all 536,230 464,100 -72,130 -13.45% x 1.16 btree::set::clone_10k_and_remove_half 453,350 430,545 -22,805 -5.03% x 1.05 btree::set::difference_random_100_vs_100 1,787 801 -986 -55.18% x 2.23 btree::set::difference_random_100_vs_10k 2,978 2,696 -282 -9.47% x 1.10 btree::set::difference_random_10k_vs_100 111,075 54,734 -56,341 -50.72% x 2.03 btree::set::difference_random_10k_vs_10k 246,380 175,980 -70,400 -28.57% x 1.40 btree::set::difference_staggered_100_vs_100 1,789 951 -838 -46.84% x 1.88 btree::set::difference_staggered_100_vs_10k 2,798 2,606 -192 -6.86% x 1.07 btree::set::difference_staggered_10k_vs_10k 176,452 97,401 -79,051 -44.80% x 1.81 btree::set::intersection_100_neg_vs_10k_pos 34 32 -2 -5.88% x 1.06 btree::set::intersection_100_pos_vs_100_neg 30 27 -3 -10.00% x 1.11 btree::set::intersection_random_100_vs_100 1,537 613 -924 -60.12% x 2.51 btree::set::intersection_random_100_vs_10k 2,793 2,649 -144 -5.16% x 1.05 btree::set::intersection_random_10k_vs_10k 222,127 147,166 -74,961 -33.75% x 1.51 btree::set::intersection_staggered_100_vs_100 1,447 622 -825 -57.01% x 2.33 btree::set::intersection_staggered_100_vs_10k 2,606 2,382 -224 -8.60% x 1.09 btree::set::intersection_staggered_10k_vs_10k 143,620 58,790 -84,830 -59.07% x 2.44 btree::set::is_subset_100_vs_100 1,349 488 -861 -63.83% x 2.76 btree::set::is_subset_100_vs_10k 1,720 1,428 -292 -16.98% x 1.20 btree::set::is_subset_10k_vs_10k 135,984 48,527 -87,457 -64.31% x 2.80 ``` The `first_and_last` ones are noise (they don't do iteration), the others seem genuine. As always, approved by Miri. Also, a separate commit with some more benchmarks of mutable behaviour (which also benefit). r? @cuviper
2 parents bfc32dd + 9f7b58f commit e2223c9

File tree

4 files changed

+233
-166
lines changed

4 files changed

+233
-166
lines changed

src/liballoc/benches/btree/set.rs

+85-16
Original file line numberDiff line numberDiff line change
@@ -50,43 +50,112 @@ macro_rules! set_bench {
5050
};
5151
}
5252

53-
const BUILD_SET_SIZE: usize = 100;
53+
#[bench]
54+
pub fn clone_100(b: &mut Bencher) {
55+
let src = pos(100);
56+
b.iter(|| src.clone())
57+
}
5458

5559
#[bench]
56-
pub fn build_and_clear(b: &mut Bencher) {
57-
b.iter(|| pos(BUILD_SET_SIZE).clear())
60+
pub fn clone_100_and_clear(b: &mut Bencher) {
61+
let src = pos(100);
62+
b.iter(|| src.clone().clear())
5863
}
5964

6065
#[bench]
61-
pub fn build_and_drop(b: &mut Bencher) {
62-
b.iter(|| pos(BUILD_SET_SIZE))
66+
pub fn clone_100_and_into_iter(b: &mut Bencher) {
67+
let src = pos(100);
68+
b.iter(|| src.clone().into_iter().count())
6369
}
6470

6571
#[bench]
66-
pub fn build_and_into_iter(b: &mut Bencher) {
67-
b.iter(|| pos(BUILD_SET_SIZE).into_iter().count())
72+
pub fn clone_100_and_pop_all(b: &mut Bencher) {
73+
let src = pos(100);
74+
b.iter(|| {
75+
let mut set = src.clone();
76+
while set.pop_first().is_some() {}
77+
set
78+
});
6879
}
6980

7081
#[bench]
71-
pub fn build_and_pop_all(b: &mut Bencher) {
82+
pub fn clone_100_and_remove_all(b: &mut Bencher) {
83+
let src = pos(100);
7284
b.iter(|| {
73-
let mut s = pos(BUILD_SET_SIZE);
74-
while s.pop_first().is_some() {}
75-
s
85+
let mut set = src.clone();
86+
while let Some(elt) = set.iter().copied().next() {
87+
set.remove(&elt);
88+
}
89+
set
7690
});
7791
}
7892

7993
#[bench]
80-
pub fn build_and_remove_all(b: &mut Bencher) {
94+
pub fn clone_100_and_remove_half(b: &mut Bencher) {
95+
let src = pos(100);
8196
b.iter(|| {
82-
let mut s = pos(BUILD_SET_SIZE);
83-
while let Some(elt) = s.iter().copied().next() {
84-
s.remove(&elt);
97+
let mut set = src.clone();
98+
for i in (2..=100 as i32).step_by(2) {
99+
set.remove(&i);
85100
}
86-
s
101+
assert_eq!(set.len(), 100 / 2);
102+
set
103+
})
104+
}
105+
106+
#[bench]
107+
pub fn clone_10k(b: &mut Bencher) {
108+
let src = pos(10_000);
109+
b.iter(|| src.clone())
110+
}
111+
112+
#[bench]
113+
pub fn clone_10k_and_clear(b: &mut Bencher) {
114+
let src = pos(10_000);
115+
b.iter(|| src.clone().clear())
116+
}
117+
118+
#[bench]
119+
pub fn clone_10k_and_into_iter(b: &mut Bencher) {
120+
let src = pos(10_000);
121+
b.iter(|| src.clone().into_iter().count())
122+
}
123+
124+
#[bench]
125+
pub fn clone_10k_and_pop_all(b: &mut Bencher) {
126+
let src = pos(10_000);
127+
b.iter(|| {
128+
let mut set = src.clone();
129+
while set.pop_first().is_some() {}
130+
set
131+
});
132+
}
133+
134+
#[bench]
135+
pub fn clone_10k_and_remove_all(b: &mut Bencher) {
136+
let src = pos(10_000);
137+
b.iter(|| {
138+
let mut set = src.clone();
139+
while let Some(elt) = set.iter().copied().next() {
140+
set.remove(&elt);
141+
}
142+
set
87143
});
88144
}
89145

146+
#[bench]
147+
pub fn clone_10k_and_remove_half(b: &mut Bencher) {
148+
let src = pos(10_000);
149+
b.iter(|| {
150+
let mut set = src.clone();
151+
for i in (2..=10_000 as i32).step_by(2) {
152+
set.remove(&i);
153+
}
154+
assert_eq!(set.len(), 10_000 / 2);
155+
set
156+
})
157+
}
158+
90159
set_bench! {intersection_100_neg_vs_100_pos, intersection, count, [neg(100), pos(100)]}
91160
set_bench! {intersection_100_neg_vs_10k_pos, intersection, count, [neg(100), pos(10_000)]}
92161
set_bench! {intersection_100_pos_vs_100_neg, intersection, count, [pos(100), neg(100)]}

src/liballoc/collections/btree/map.rs

+4-7
Original file line numberDiff line numberDiff line change
@@ -1487,16 +1487,13 @@ impl<K, V> Drop for IntoIter<K, V> {
14871487
}
14881488

14891489
unsafe {
1490-
let leaf_node = ptr::read(&self.front).into_node();
1491-
if leaf_node.is_shared_root() {
1490+
let mut node = ptr::read(&self.front).into_node().forget_type();
1491+
if node.is_shared_root() {
14921492
return;
14931493
}
14941494

1495-
if let Some(first_parent) = leaf_node.deallocate_and_ascend() {
1496-
let mut cur_internal_node = first_parent.into_node();
1497-
while let Some(parent) = cur_internal_node.deallocate_and_ascend() {
1498-
cur_internal_node = parent.into_node()
1499-
}
1495+
while let Some(parent) = node.deallocate_and_ascend() {
1496+
node = parent.into_node().forget_type();
15001497
}
15011498
}
15021499
}

0 commit comments

Comments
 (0)