Skip to content

Commit 718ba0d

Browse files
authored
Rollup merge of #68770 - ssomers:btree_drain_filter, r=Amanieu
BTreeMap/BTreeSet: implement drain_filter Provide an implementation of drain_filter for BTreeMap and BTreeSet. Should be optimal when the predicate picks only elements in leaf nodes with at least MIN_LEN remaining elements, which is a common case, at least when draining only a fraction of the map/set, and also when the predicate picks elements stored in internal nodes where the right subtree can easily let go of a replacement element. The first commit adds benchmarks with an external, naive implementation. to compare how much this claimed optimality-in-some-cases is actually worth.
2 parents a5b09d3 + 0405db3 commit 718ba0d

File tree

7 files changed

+671
-14
lines changed

7 files changed

+671
-14
lines changed

src/liballoc/benches/btree/set.rs

+32
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,22 @@ pub fn clone_100_and_clear(b: &mut Bencher) {
6262
b.iter(|| src.clone().clear())
6363
}
6464

65+
#[bench]
66+
pub fn clone_100_and_drain_all(b: &mut Bencher) {
67+
let src = pos(100);
68+
b.iter(|| src.clone().drain_filter(|_| true).count())
69+
}
70+
71+
#[bench]
72+
pub fn clone_100_and_drain_half(b: &mut Bencher) {
73+
let src = pos(100);
74+
b.iter(|| {
75+
let mut set = src.clone();
76+
assert_eq!(set.drain_filter(|i| i % 2 == 0).count(), 100 / 2);
77+
assert_eq!(set.len(), 100 / 2);
78+
})
79+
}
80+
6581
#[bench]
6682
pub fn clone_100_and_into_iter(b: &mut Bencher) {
6783
let src = pos(100);
@@ -115,6 +131,22 @@ pub fn clone_10k_and_clear(b: &mut Bencher) {
115131
b.iter(|| src.clone().clear())
116132
}
117133

134+
#[bench]
135+
pub fn clone_10k_and_drain_all(b: &mut Bencher) {
136+
let src = pos(10_000);
137+
b.iter(|| src.clone().drain_filter(|_| true).count())
138+
}
139+
140+
#[bench]
141+
pub fn clone_10k_and_drain_half(b: &mut Bencher) {
142+
let src = pos(10_000);
143+
b.iter(|| {
144+
let mut set = src.clone();
145+
assert_eq!(set.drain_filter(|i| i % 2 == 0).count(), 10_000 / 2);
146+
assert_eq!(set.len(), 10_000 / 2);
147+
})
148+
}
149+
118150
#[bench]
119151
pub fn clone_10k_and_into_iter(b: &mut Bencher) {
120152
let src = pos(10_000);

src/liballoc/benches/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#![feature(btree_drain_filter)]
12
#![feature(map_first_last)]
23
#![feature(repr_simd)]
34
#![feature(test)]

src/liballoc/collections/btree/map.rs

+200-10
Original file line numberDiff line numberDiff line change
@@ -1256,6 +1256,48 @@ impl<K: Ord, V> BTreeMap<K, V> {
12561256
right
12571257
}
12581258

1259+
/// Creates an iterator which uses a closure to determine if an element should be removed.
1260+
///
1261+
/// If the closure returns true, the element is removed from the map and yielded.
1262+
/// If the closure returns false, or panics, the element remains in the map and will not be
1263+
/// yielded.
1264+
///
1265+
/// Note that `drain_filter` lets you mutate every value in the filter closure, regardless of
1266+
/// whether you choose to keep or remove it.
1267+
///
1268+
/// If the iterator is only partially consumed or not consumed at all, each of the remaining
1269+
/// elements will still be subjected to the closure and removed and dropped if it returns true.
1270+
///
1271+
/// It is unspecified how many more elements will be subjected to the closure
1272+
/// if a panic occurs in the closure, or a panic occurs while dropping an element,
1273+
/// or if the `DrainFilter` value is leaked.
1274+
///
1275+
/// # Examples
1276+
///
1277+
/// Splitting a map into even and odd keys, reusing the original map:
1278+
///
1279+
/// ```
1280+
/// #![feature(btree_drain_filter)]
1281+
/// use std::collections::BTreeMap;
1282+
///
1283+
/// let mut map: BTreeMap<i32, i32> = (0..8).map(|x| (x, x)).collect();
1284+
/// let evens: BTreeMap<_, _> = map.drain_filter(|k, _v| k % 2 == 0).collect();
1285+
/// let odds = map;
1286+
/// assert_eq!(evens.keys().copied().collect::<Vec<_>>(), vec![0, 2, 4, 6]);
1287+
/// assert_eq!(odds.keys().copied().collect::<Vec<_>>(), vec![1, 3, 5, 7]);
1288+
/// ```
1289+
#[unstable(feature = "btree_drain_filter", issue = "70530")]
1290+
pub fn drain_filter<F>(&mut self, pred: F) -> DrainFilter<'_, K, V, F>
1291+
where
1292+
F: FnMut(&K, &mut V) -> bool,
1293+
{
1294+
DrainFilter { pred, inner: self.drain_filter_inner() }
1295+
}
1296+
pub(super) fn drain_filter_inner(&mut self) -> DrainFilterInner<'_, K, V> {
1297+
let front = self.root.as_mut().map(|r| r.as_mut().first_leaf_edge());
1298+
DrainFilterInner { length: &mut self.length, cur_leaf_edge: front }
1299+
}
1300+
12591301
/// Calculates the number of elements if it is incorrect.
12601302
fn recalc_length(&mut self) {
12611303
fn dfs<'a, K, V>(node: NodeRef<marker::Immut<'a>, K, V, marker::LeafOrInternal>) -> usize
@@ -1653,6 +1695,124 @@ impl<K, V> Clone for Values<'_, K, V> {
16531695
}
16541696
}
16551697

1698+
/// An iterator produced by calling `drain_filter` on BTreeMap.
1699+
#[unstable(feature = "btree_drain_filter", issue = "70530")]
1700+
pub struct DrainFilter<'a, K, V, F>
1701+
where
1702+
K: 'a + Ord, // This Ord bound should be removed before stabilization.
1703+
V: 'a,
1704+
F: 'a + FnMut(&K, &mut V) -> bool,
1705+
{
1706+
pred: F,
1707+
inner: DrainFilterInner<'a, K, V>,
1708+
}
1709+
pub(super) struct DrainFilterInner<'a, K, V>
1710+
where
1711+
K: 'a + Ord,
1712+
V: 'a,
1713+
{
1714+
length: &'a mut usize,
1715+
cur_leaf_edge: Option<Handle<NodeRef<marker::Mut<'a>, K, V, marker::Leaf>, marker::Edge>>,
1716+
}
1717+
1718+
#[unstable(feature = "btree_drain_filter", issue = "70530")]
1719+
impl<'a, K, V, F> Drop for DrainFilter<'a, K, V, F>
1720+
where
1721+
K: 'a + Ord,
1722+
V: 'a,
1723+
F: 'a + FnMut(&K, &mut V) -> bool,
1724+
{
1725+
fn drop(&mut self) {
1726+
self.for_each(drop);
1727+
}
1728+
}
1729+
1730+
#[unstable(feature = "btree_drain_filter", issue = "70530")]
1731+
impl<'a, K, V, F> fmt::Debug for DrainFilter<'a, K, V, F>
1732+
where
1733+
K: 'a + fmt::Debug + Ord,
1734+
V: 'a + fmt::Debug,
1735+
F: 'a + FnMut(&K, &mut V) -> bool,
1736+
{
1737+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1738+
f.debug_tuple("DrainFilter").field(&self.inner.peek()).finish()
1739+
}
1740+
}
1741+
1742+
#[unstable(feature = "btree_drain_filter", issue = "70530")]
1743+
impl<'a, K, V, F> Iterator for DrainFilter<'a, K, V, F>
1744+
where
1745+
K: 'a + Ord,
1746+
V: 'a,
1747+
F: 'a + FnMut(&K, &mut V) -> bool,
1748+
{
1749+
type Item = (K, V);
1750+
1751+
fn next(&mut self) -> Option<(K, V)> {
1752+
self.inner.next(&mut self.pred)
1753+
}
1754+
1755+
fn size_hint(&self) -> (usize, Option<usize>) {
1756+
self.inner.size_hint()
1757+
}
1758+
}
1759+
1760+
impl<'a, K, V> DrainFilterInner<'a, K, V>
1761+
where
1762+
K: 'a + Ord,
1763+
V: 'a,
1764+
{
1765+
/// Allow Debug implementations to predict the next element.
1766+
pub(super) fn peek(&self) -> Option<(&K, &V)> {
1767+
let edge = self.cur_leaf_edge.as_ref()?;
1768+
edge.reborrow().next_kv().ok().map(|kv| kv.into_kv())
1769+
}
1770+
1771+
unsafe fn next_kv(
1772+
&mut self,
1773+
) -> Option<Handle<NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal>, marker::KV>> {
1774+
let edge = self.cur_leaf_edge.as_ref()?;
1775+
ptr::read(edge).next_kv().ok()
1776+
}
1777+
1778+
/// Implementation of a typical `DrainFilter::next` method, given the predicate.
1779+
pub(super) fn next<F>(&mut self, pred: &mut F) -> Option<(K, V)>
1780+
where
1781+
F: FnMut(&K, &mut V) -> bool,
1782+
{
1783+
while let Some(kv) = unsafe { self.next_kv() } {
1784+
let (k, v) = unsafe { ptr::read(&kv) }.into_kv_mut();
1785+
if pred(k, v) {
1786+
*self.length -= 1;
1787+
let (k, v, leaf_edge_location) = kv.remove_kv_tracking();
1788+
// `remove_kv_tracking` has either preserved or invalidated `self.cur_leaf_edge`
1789+
if let Some(node) = leaf_edge_location {
1790+
match search::search_tree(node, &k) {
1791+
search::SearchResult::Found(_) => unreachable!(),
1792+
search::SearchResult::GoDown(leaf) => self.cur_leaf_edge = Some(leaf),
1793+
}
1794+
};
1795+
return Some((k, v));
1796+
}
1797+
self.cur_leaf_edge = Some(kv.next_leaf_edge());
1798+
}
1799+
None
1800+
}
1801+
1802+
/// Implementation of a typical `DrainFilter::size_hint` method.
1803+
pub(super) fn size_hint(&self) -> (usize, Option<usize>) {
1804+
(0, Some(*self.length))
1805+
}
1806+
}
1807+
1808+
#[unstable(feature = "btree_drain_filter", issue = "70530")]
1809+
impl<K, V, F> FusedIterator for DrainFilter<'_, K, V, F>
1810+
where
1811+
K: Ord,
1812+
F: FnMut(&K, &mut V) -> bool,
1813+
{
1814+
}
1815+
16561816
#[stable(feature = "btree_range", since = "1.17.0")]
16571817
impl<'a, K, V> Iterator for Range<'a, K, V> {
16581818
type Item = (&'a K, &'a V);
@@ -2531,12 +2691,31 @@ impl<'a, K: Ord, V> OccupiedEntry<'a, K, V> {
25312691
fn remove_kv(self) -> (K, V) {
25322692
*self.length -= 1;
25332693

2534-
let (small_leaf, old_key, old_val) = match self.handle.force() {
2694+
let (old_key, old_val, _) = self.handle.remove_kv_tracking();
2695+
(old_key, old_val)
2696+
}
2697+
}
2698+
2699+
impl<'a, K: 'a, V: 'a> Handle<NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal>, marker::KV> {
2700+
/// Removes a key/value-pair from the map, and returns that pair, as well as
2701+
/// the whereabouts of the leaf edge corresponding to that former pair:
2702+
/// if None is returned, the leaf edge is still the left leaf edge of the KV handle;
2703+
/// if a node is returned, it heads the subtree where the leaf edge may be found.
2704+
fn remove_kv_tracking(
2705+
self,
2706+
) -> (K, V, Option<NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal>>) {
2707+
let mut levels_down_handled: isize;
2708+
let (small_leaf, old_key, old_val) = match self.force() {
25352709
Leaf(leaf) => {
2710+
levels_down_handled = 1; // handled at same level, but affects only the right side
25362711
let (hole, old_key, old_val) = leaf.remove();
25372712
(hole.into_node(), old_key, old_val)
25382713
}
25392714
Internal(mut internal) => {
2715+
// Replace the location freed in the internal node with the next KV,
2716+
// and remove that next KV from its leaf.
2717+
levels_down_handled = unsafe { ptr::read(&internal).into_node().height() } as isize;
2718+
25402719
let key_loc = internal.kv_mut().0 as *mut K;
25412720
let val_loc = internal.kv_mut().1 as *mut V;
25422721

@@ -2556,27 +2735,39 @@ impl<'a, K: Ord, V> OccupiedEntry<'a, K, V> {
25562735
let mut cur_node = small_leaf.forget_type();
25572736
while cur_node.len() < node::MIN_LEN {
25582737
match handle_underfull_node(cur_node) {
2559-
AtRoot => break,
2738+
AtRoot(root) => {
2739+
cur_node = root;
2740+
break;
2741+
}
25602742
EmptyParent(_) => unreachable!(),
25612743
Merged(parent) => {
2744+
levels_down_handled -= 1;
25622745
if parent.len() == 0 {
25632746
// We must be at the root
2564-
parent.into_root_mut().pop_level();
2747+
let root = parent.into_root_mut();
2748+
root.pop_level();
2749+
cur_node = root.as_mut();
25652750
break;
25662751
} else {
25672752
cur_node = parent.forget_type();
25682753
}
25692754
}
2570-
Stole(_) => break,
2755+
Stole(internal_node) => {
2756+
levels_down_handled -= 1;
2757+
cur_node = internal_node.forget_type();
2758+
// This internal node might be underfull, but only if it's the root.
2759+
break;
2760+
}
25712761
}
25722762
}
25732763

2574-
(old_key, old_val)
2764+
let leaf_edge_location = if levels_down_handled > 0 { None } else { Some(cur_node) };
2765+
(old_key, old_val, leaf_edge_location)
25752766
}
25762767
}
25772768

25782769
enum UnderflowResult<'a, K, V> {
2579-
AtRoot,
2770+
AtRoot(NodeRef<marker::Mut<'a>, K, V, marker::LeafOrInternal>),
25802771
EmptyParent(NodeRef<marker::Mut<'a>, K, V, marker::Internal>),
25812772
Merged(NodeRef<marker::Mut<'a>, K, V, marker::Internal>),
25822773
Stole(NodeRef<marker::Mut<'a>, K, V, marker::Internal>),
@@ -2585,10 +2776,9 @@ enum UnderflowResult<'a, K, V> {
25852776
fn handle_underfull_node<K, V>(
25862777
node: NodeRef<marker::Mut<'_>, K, V, marker::LeafOrInternal>,
25872778
) -> UnderflowResult<'_, K, V> {
2588-
let parent = if let Ok(parent) = node.ascend() {
2589-
parent
2590-
} else {
2591-
return AtRoot;
2779+
let parent = match node.ascend() {
2780+
Ok(parent) => parent,
2781+
Err(root) => return AtRoot(root),
25922782
};
25932783

25942784
let (is_left, mut handle) = match parent.left_kv() {

0 commit comments

Comments
 (0)