Skip to content

Commit d329f74

Browse files
authored
Rollup merge of rust-lang#71510 - ssomers:btreemap_iter_intertwined, r=Mark-Simulacrum
Btreemap iter intertwined 3 commits: 1. Introduced benchmarks for `BTreeMap::iter()`. Benchmarks named `iter_20` were of the whole iteration process, so I renamed them. Also the benchmarks of `range` that I wrote earlier weren't very good. I included an (awkwardly named) one that compares `iter()` to `range(..)` on the same set, because the contrast is surprising: ``` name ns/iter btree::map::range_unbounded_unbounded 28,176 btree::map::range_unbounded_vs_iter 89,369 ``` Both dig up the same pair of leaf edges. `range(..)` also checks that some keys are correctly ordered, the only thing `iter()` does more is to copy the map's length. 2. Slightly refactoring the code to what I find more readable (not in chronological order of discovery), boosts performance: ``` >cargo-benchcmp.exe benchcmp a1 a2 --threshold 5 name a1 ns/iter a2 ns/iter diff ns/iter diff % speedup btree::map::find_rand_100 18 17 -1 -5.56% x 1.06 btree::map::first_and_last_10k 64 71 7 10.94% x 0.90 btree::map::iter_0 2,939 2,209 -730 -24.84% x 1.33 btree::map::iter_1 6,845 2,696 -4,149 -60.61% x 2.54 btree::map::iter_100 8,556 3,672 -4,884 -57.08% x 2.33 btree::map::iter_10k 9,292 5,884 -3,408 -36.68% x 1.58 btree::map::iter_1m 10,268 6,510 -3,758 -36.60% x 1.58 btree::map::iteration_mut_100000 478,575 453,050 -25,525 -5.33% x 1.06 btree::map::range_unbounded_unbounded 28,176 36,169 7,993 28.37% x 0.78 btree::map::range_unbounded_vs_iter 89,369 38,290 -51,079 -57.16% x 2.33 btree::set::clone_100_and_remove_all 4,801 4,245 -556 -11.58% x 1.13 btree::set::clone_10k_and_remove_all 529,450 496,030 -33,420 -6.31% x 1.07 ``` But you can tell from the `range_unbounded_*` lines that, despite an unwarranted, vengeful attack on the range_unbounded_unbounded benchmark, this change still doesn't allow `iter()` to catch up with `range(..)`. 3. I guess that `range(..)` copes so well because it intertwines the leftmost and rightmost descend towards leaf edges, doing the two root node accesses close together, perhaps exploiting a CPU's internal pipelining? So the third commit distils a version of `range_search` (which we can't use directly because of the `Ord` bound), and we get another boost: ``` cargo-benchcmp.exe benchcmp a2 a3 --threshold 5 name a2 ns/iter a3 ns/iter diff ns/iter diff % speedup btree::map::first_and_last_100 40 43 3 7.50% x 0.93 btree::map::first_and_last_10k 71 64 -7 -9.86% x 1.11 btree::map::iter_0 2,209 1,719 -490 -22.18% x 1.29 btree::map::iter_1 2,696 2,205 -491 -18.21% x 1.22 btree::map::iter_100 3,672 2,943 -729 -19.85% x 1.25 btree::map::iter_10k 5,884 3,929 -1,955 -33.23% x 1.50 btree::map::iter_1m 6,510 5,532 -978 -15.02% x 1.18 btree::map::iteration_mut_100000 453,050 476,667 23,617 5.21% x 0.95 btree::map::range_included_excluded 405,075 371,297 -33,778 -8.34% x 1.09 btree::map::range_included_included 427,577 397,440 -30,137 -7.05% x 1.08 btree::map::range_unbounded_unbounded 36,169 28,175 -7,994 -22.10% x 1.28 btree::map::range_unbounded_vs_iter 38,290 30,838 -7,452 -19.46% x 1.24 ``` But I think this is just fake news from the microbenchmarking media. `iter()` is still trying to catch up with `range(..)`. And we can sure do without another function. So I would skip this 3rd commit. r? @Mark-Simulacrum
2 parents 793f921 + 8730227 commit d329f74

File tree

2 files changed

+119
-75
lines changed

2 files changed

+119
-75
lines changed

src/liballoc/benches/btree/map.rs

+72-46
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::collections::BTreeMap;
22
use std::iter::Iterator;
3-
use std::ops::Bound::{Excluded, Unbounded};
3+
use std::ops::RangeBounds;
44
use std::vec::Vec;
55

66
use rand::{seq::SliceRandom, thread_rng, Rng};
@@ -117,7 +117,7 @@ map_find_rand_bench! {find_rand_10_000, 10_000, BTreeMap}
117117
map_find_seq_bench! {find_seq_100, 100, BTreeMap}
118118
map_find_seq_bench! {find_seq_10_000, 10_000, BTreeMap}
119119

120-
fn bench_iter(b: &mut Bencher, size: i32) {
120+
fn bench_iteration(b: &mut Bencher, size: i32) {
121121
let mut map = BTreeMap::<i32, i32>::new();
122122
let mut rng = thread_rng();
123123

@@ -133,21 +133,21 @@ fn bench_iter(b: &mut Bencher, size: i32) {
133133
}
134134

135135
#[bench]
136-
pub fn iter_20(b: &mut Bencher) {
137-
bench_iter(b, 20);
136+
pub fn iteration_20(b: &mut Bencher) {
137+
bench_iteration(b, 20);
138138
}
139139

140140
#[bench]
141-
pub fn iter_1000(b: &mut Bencher) {
142-
bench_iter(b, 1000);
141+
pub fn iteration_1000(b: &mut Bencher) {
142+
bench_iteration(b, 1000);
143143
}
144144

145145
#[bench]
146-
pub fn iter_100000(b: &mut Bencher) {
147-
bench_iter(b, 100000);
146+
pub fn iteration_100000(b: &mut Bencher) {
147+
bench_iteration(b, 100000);
148148
}
149149

150-
fn bench_iter_mut(b: &mut Bencher, size: i32) {
150+
fn bench_iteration_mut(b: &mut Bencher, size: i32) {
151151
let mut map = BTreeMap::<i32, i32>::new();
152152
let mut rng = thread_rng();
153153

@@ -163,18 +163,18 @@ fn bench_iter_mut(b: &mut Bencher, size: i32) {
163163
}
164164

165165
#[bench]
166-
pub fn iter_mut_20(b: &mut Bencher) {
167-
bench_iter_mut(b, 20);
166+
pub fn iteration_mut_20(b: &mut Bencher) {
167+
bench_iteration_mut(b, 20);
168168
}
169169

170170
#[bench]
171-
pub fn iter_mut_1000(b: &mut Bencher) {
172-
bench_iter_mut(b, 1000);
171+
pub fn iteration_mut_1000(b: &mut Bencher) {
172+
bench_iteration_mut(b, 1000);
173173
}
174174

175175
#[bench]
176-
pub fn iter_mut_100000(b: &mut Bencher) {
177-
bench_iter_mut(b, 100000);
176+
pub fn iteration_mut_100000(b: &mut Bencher) {
177+
bench_iteration_mut(b, 100000);
178178
}
179179

180180
fn bench_first_and_last(b: &mut Bencher, size: i32) {
@@ -202,57 +202,83 @@ pub fn first_and_last_10k(b: &mut Bencher) {
202202
bench_first_and_last(b, 10_000);
203203
}
204204

205-
#[bench]
206-
pub fn range_excluded_excluded(b: &mut Bencher) {
207-
let size = 144;
208-
let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
205+
const BENCH_RANGE_SIZE: i32 = 145;
206+
const BENCH_RANGE_COUNT: i32 = BENCH_RANGE_SIZE * (BENCH_RANGE_SIZE - 1) / 2;
207+
208+
fn bench_range<F, R>(b: &mut Bencher, f: F)
209+
where
210+
F: Fn(i32, i32) -> R,
211+
R: RangeBounds<i32>,
212+
{
213+
let map: BTreeMap<_, _> = (0..BENCH_RANGE_SIZE).map(|i| (i, i)).collect();
209214
b.iter(|| {
210-
for first in 0..size {
211-
for last in first + 1..size {
212-
black_box(map.range((Excluded(first), Excluded(last))));
215+
let mut c = 0;
216+
for i in 0..BENCH_RANGE_SIZE {
217+
for j in i + 1..BENCH_RANGE_SIZE {
218+
black_box(map.range(f(i, j)));
219+
c += 1;
213220
}
214221
}
222+
debug_assert_eq!(c, BENCH_RANGE_COUNT);
215223
});
216224
}
217225

218226
#[bench]
219-
pub fn range_excluded_unbounded(b: &mut Bencher) {
220-
let size = 144;
221-
let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
222-
b.iter(|| {
223-
for first in 0..size {
224-
black_box(map.range((Excluded(first), Unbounded)));
225-
}
226-
});
227+
pub fn range_included_excluded(b: &mut Bencher) {
228+
bench_range(b, |i, j| i..j);
227229
}
228230

229231
#[bench]
230232
pub fn range_included_included(b: &mut Bencher) {
231-
let size = 144;
232-
let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
233-
b.iter(|| {
234-
for first in 0..size {
235-
for last in first..size {
236-
black_box(map.range(first..=last));
237-
}
238-
}
239-
});
233+
bench_range(b, |i, j| i..=j);
240234
}
241235

242236
#[bench]
243237
pub fn range_included_unbounded(b: &mut Bencher) {
244-
let size = 144;
238+
bench_range(b, |i, _| i..);
239+
}
240+
241+
#[bench]
242+
pub fn range_unbounded_unbounded(b: &mut Bencher) {
243+
bench_range(b, |_, _| ..);
244+
}
245+
246+
fn bench_iter(b: &mut Bencher, repeats: i32, size: i32) {
245247
let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
246248
b.iter(|| {
247-
for first in 0..size {
248-
black_box(map.range(first..));
249+
for _ in 0..repeats {
250+
black_box(map.iter());
249251
}
250252
});
251253
}
252254

255+
/// Contrast range_unbounded_unbounded with `iter()`.
253256
#[bench]
254-
pub fn range_unbounded_unbounded(b: &mut Bencher) {
255-
let size = 144;
256-
let map: BTreeMap<_, _> = (0..size).map(|i| (i, i)).collect();
257-
b.iter(|| map.range(..));
257+
pub fn range_unbounded_vs_iter(b: &mut Bencher) {
258+
bench_iter(b, BENCH_RANGE_COUNT, BENCH_RANGE_SIZE);
259+
}
260+
261+
#[bench]
262+
pub fn iter_0(b: &mut Bencher) {
263+
bench_iter(b, 1_000, 0);
264+
}
265+
266+
#[bench]
267+
pub fn iter_1(b: &mut Bencher) {
268+
bench_iter(b, 1_000, 1);
269+
}
270+
271+
#[bench]
272+
pub fn iter_100(b: &mut Bencher) {
273+
bench_iter(b, 1_000, 100);
274+
}
275+
276+
#[bench]
277+
pub fn iter_10k(b: &mut Bencher) {
278+
bench_iter(b, 1_000, 10_000);
279+
}
280+
281+
#[bench]
282+
pub fn iter_1m(b: &mut Bencher) {
283+
bench_iter(b, 1_000, 1_000_000);
258284
}

src/liballoc/collections/btree/map.rs

+47-29
Original file line numberDiff line numberDiff line change
@@ -1540,16 +1540,10 @@ impl<K, V> IntoIterator for BTreeMap<K, V> {
15401540

15411541
fn into_iter(self) -> IntoIter<K, V> {
15421542
let mut me = ManuallyDrop::new(self);
1543-
if let Some(root) = me.root.as_mut() {
1544-
let root1 = unsafe { ptr::read(root).into_ref() };
1545-
let root2 = unsafe { ptr::read(root).into_ref() };
1546-
let len = me.length;
1547-
1548-
IntoIter {
1549-
front: Some(root1.first_leaf_edge()),
1550-
back: Some(root2.last_leaf_edge()),
1551-
length: len,
1552-
}
1543+
if let Some(root) = me.root.take() {
1544+
let (f, b) = full_range_search(root.into_ref());
1545+
1546+
IntoIter { front: Some(f), back: Some(b), length: me.length }
15531547
} else {
15541548
IntoIter { front: None, back: None, length: 0 }
15551549
}
@@ -2037,6 +2031,7 @@ where
20372031
}
20382032
}
20392033

2034+
/// Finds the leaf edges delimiting a specified range in or underneath a node.
20402035
fn range_search<BorrowType, K, V, Q: ?Sized, R: RangeBounds<Q>>(
20412036
root: NodeRef<BorrowType, K, V, marker::LeafOrInternal>,
20422037
range: R,
@@ -2122,6 +2117,33 @@ where
21222117
}
21232118
}
21242119

2120+
/// Equivalent to `range_search(k, v, ..)` without the `Ord` bound.
2121+
fn full_range_search<BorrowType, K, V>(
2122+
root: NodeRef<BorrowType, K, V, marker::LeafOrInternal>,
2123+
) -> (
2124+
Handle<NodeRef<BorrowType, K, V, marker::Leaf>, marker::Edge>,
2125+
Handle<NodeRef<BorrowType, K, V, marker::Leaf>, marker::Edge>,
2126+
) {
2127+
// We duplicate the root NodeRef here -- we will never access it in a way
2128+
// that overlaps references obtained from the root.
2129+
let mut min_node = unsafe { ptr::read(&root) };
2130+
let mut max_node = root;
2131+
loop {
2132+
let front = min_node.first_edge();
2133+
let back = max_node.last_edge();
2134+
match (front.force(), back.force()) {
2135+
(Leaf(f), Leaf(b)) => {
2136+
return (f, b);
2137+
}
2138+
(Internal(min_int), Internal(max_int)) => {
2139+
min_node = min_int.descend();
2140+
max_node = max_int.descend();
2141+
}
2142+
_ => unreachable!("BTreeMap has different depths"),
2143+
};
2144+
}
2145+
}
2146+
21252147
impl<K, V> BTreeMap<K, V> {
21262148
/// Gets an iterator over the entries of the map, sorted by key.
21272149
///
@@ -2146,12 +2168,12 @@ impl<K, V> BTreeMap<K, V> {
21462168
/// ```
21472169
#[stable(feature = "rust1", since = "1.0.0")]
21482170
pub fn iter(&self) -> Iter<'_, K, V> {
2149-
Iter {
2150-
range: Range {
2151-
front: self.root.as_ref().map(|r| r.as_ref().first_leaf_edge()),
2152-
back: self.root.as_ref().map(|r| r.as_ref().last_leaf_edge()),
2153-
},
2154-
length: self.length,
2171+
if let Some(root) = &self.root {
2172+
let (f, b) = full_range_search(root.as_ref());
2173+
2174+
Iter { range: Range { front: Some(f), back: Some(b) }, length: self.length }
2175+
} else {
2176+
Iter { range: Range { front: None, back: None }, length: 0 }
21552177
}
21562178
}
21572179

@@ -2178,19 +2200,15 @@ impl<K, V> BTreeMap<K, V> {
21782200
/// ```
21792201
#[stable(feature = "rust1", since = "1.0.0")]
21802202
pub fn iter_mut(&mut self) -> IterMut<'_, K, V> {
2181-
IterMut {
2182-
range: if let Some(root) = &mut self.root {
2183-
let root1 = root.as_mut();
2184-
let root2 = unsafe { ptr::read(&root1) };
2185-
RangeMut {
2186-
front: Some(root1.first_leaf_edge()),
2187-
back: Some(root2.last_leaf_edge()),
2188-
_marker: PhantomData,
2189-
}
2190-
} else {
2191-
RangeMut { front: None, back: None, _marker: PhantomData }
2192-
},
2193-
length: self.length,
2203+
if let Some(root) = &mut self.root {
2204+
let (f, b) = full_range_search(root.as_mut());
2205+
2206+
IterMut {
2207+
range: RangeMut { front: Some(f), back: Some(b), _marker: PhantomData },
2208+
length: self.length,
2209+
}
2210+
} else {
2211+
IterMut { range: RangeMut { front: None, back: None, _marker: PhantomData }, length: 0 }
21942212
}
21952213
}
21962214

0 commit comments

Comments
 (0)