Skip to content

Commit d132a70

Browse files
committed
BTreeSet intersection, difference & is_subnet optimizations
1 parent 702b45e commit d132a70

File tree

2 files changed

+247
-93
lines changed

2 files changed

+247
-93
lines changed

src/liballoc/collections/btree/set.rs

+155-75
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,16 @@ pub struct Difference<'a, T: 'a> {
122122
}
123123
enum DifferenceInner<'a, T: 'a> {
124124
Stitch {
125+
// iterate all of self and some of other, spotting matches along the way
125126
self_iter: Iter<'a, T>,
126127
other_iter: Peekable<Iter<'a, T>>,
127128
},
128129
Search {
130+
// iterate a small set, look up in the large set
129131
self_iter: Iter<'a, T>,
130132
other_set: &'a BTreeSet<T>,
131133
},
134+
Iterate(Iter<'a, T>), // simply stream self's elements
132135
}
133136

134137
#[stable(feature = "collection_debug", since = "1.17.0")]
@@ -147,6 +150,7 @@ impl<T: fmt::Debug> fmt::Debug for Difference<'_, T> {
147150
self_iter,
148151
other_set: _,
149152
} => f.debug_tuple("Difference").field(&self_iter).finish(),
153+
DifferenceInner::Iterate(iter) => f.debug_tuple("Difference").field(&iter).finish(),
150154
}
151155
}
152156
}
@@ -187,13 +191,16 @@ pub struct Intersection<'a, T: 'a> {
187191
}
188192
enum IntersectionInner<'a, T: 'a> {
189193
Stitch {
194+
// iterate similarly sized sets jointly, spotting matches along the way
190195
a: Iter<'a, T>,
191196
b: Iter<'a, T>,
192197
},
193198
Search {
199+
// iterate a small set, look up in the large set
194200
small_iter: Iter<'a, T>,
195201
large_set: &'a BTreeSet<T>,
196202
},
203+
Answer(Option<&'a T>), // return a specific value or emptiness
197204
}
198205

199206
#[stable(feature = "collection_debug", since = "1.17.0")]
@@ -212,6 +219,9 @@ impl<T: fmt::Debug> fmt::Debug for Intersection<'_, T> {
212219
small_iter,
213220
large_set: _,
214221
} => f.debug_tuple("Intersection").field(&small_iter).finish(),
222+
IntersectionInner::Answer(answer) => {
223+
f.debug_tuple("Intersection").field(&answer).finish()
224+
}
215225
}
216226
}
217227
}
@@ -314,24 +324,51 @@ impl<T: Ord> BTreeSet<T> {
314324
/// ```
315325
#[stable(feature = "rust1", since = "1.0.0")]
316326
pub fn difference<'a>(&'a self, other: &'a BTreeSet<T>) -> Difference<'a, T> {
317-
if self.len() > other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
318-
// Self is bigger than or not much smaller than other set.
319-
// Iterate both sets jointly, spotting matches along the way.
320-
Difference {
321-
inner: DifferenceInner::Stitch {
322-
self_iter: self.iter(),
323-
other_iter: other.iter().peekable(),
324-
},
325-
}
327+
let (self_min, self_max) = if let (Some(self_min), Some(self_max)) =
328+
(self.iter().next(), self.iter().next_back())
329+
{
330+
(self_min, self_max)
326331
} else {
327-
// Self is much smaller than other set, or both sets are empty.
328-
// Iterate the small set, searching for matches in the large set.
329-
Difference {
330-
inner: DifferenceInner::Search {
331-
self_iter: self.iter(),
332-
other_set: other,
333-
},
334-
}
332+
return Difference {
333+
inner: DifferenceInner::Iterate(self.iter()),
334+
};
335+
};
336+
let (other_min, other_max) = if let (Some(other_min), Some(other_max)) =
337+
(other.iter().next(), other.iter().next_back())
338+
{
339+
(other_min, other_max)
340+
} else {
341+
return Difference {
342+
inner: DifferenceInner::Iterate(self.iter()),
343+
};
344+
};
345+
Difference {
346+
inner: match (self_min.cmp(other_max), self_max.cmp(other_min)) {
347+
(Greater, _) | (_, Less) => DifferenceInner::Iterate(self.iter()),
348+
(Equal, _) => {
349+
let mut self_iter = self.iter();
350+
self_iter.next();
351+
DifferenceInner::Iterate(self_iter)
352+
}
353+
(_, Equal) => {
354+
let mut self_iter = self.iter();
355+
self_iter.next_back();
356+
DifferenceInner::Iterate(self_iter)
357+
}
358+
_ => {
359+
if self.len() <= other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
360+
DifferenceInner::Search {
361+
self_iter: self.iter(),
362+
other_set: other,
363+
}
364+
} else {
365+
DifferenceInner::Stitch {
366+
self_iter: self.iter(),
367+
other_iter: other.iter().peekable(),
368+
}
369+
}
370+
}
371+
},
335372
}
336373
}
337374

@@ -387,29 +424,48 @@ impl<T: Ord> BTreeSet<T> {
387424
/// ```
388425
#[stable(feature = "rust1", since = "1.0.0")]
389426
pub fn intersection<'a>(&'a self, other: &'a BTreeSet<T>) -> Intersection<'a, T> {
390-
let (small, other) = if self.len() <= other.len() {
391-
(self, other)
427+
let (self_min, self_max) = if let (Some(self_min), Some(self_max)) =
428+
(self.iter().next(), self.iter().next_back())
429+
{
430+
(self_min, self_max)
392431
} else {
393-
(other, self)
432+
return Intersection {
433+
inner: IntersectionInner::Answer(None),
434+
};
394435
};
395-
if small.len() > other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
396-
// Small set is not much smaller than other set.
397-
// Iterate both sets jointly, spotting matches along the way.
398-
Intersection {
399-
inner: IntersectionInner::Stitch {
400-
a: small.iter(),
401-
b: other.iter(),
402-
},
403-
}
436+
let (other_min, other_max) = if let (Some(other_min), Some(other_max)) =
437+
(other.iter().next(), other.iter().next_back())
438+
{
439+
(other_min, other_max)
404440
} else {
405-
// Big difference in number of elements, or both sets are empty.
406-
// Iterate the small set, searching for matches in the large set.
407-
Intersection {
408-
inner: IntersectionInner::Search {
409-
small_iter: small.iter(),
410-
large_set: other,
411-
},
412-
}
441+
return Intersection {
442+
inner: IntersectionInner::Answer(None),
443+
};
444+
};
445+
Intersection {
446+
inner: match (self_min.cmp(other_max), self_max.cmp(other_min)) {
447+
(Greater, _) | (_, Less) => IntersectionInner::Answer(None),
448+
(Equal, _) => IntersectionInner::Answer(Some(self_min)),
449+
(_, Equal) => IntersectionInner::Answer(Some(self_max)),
450+
_ => {
451+
if self.len() <= other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
452+
IntersectionInner::Search {
453+
small_iter: self.iter(),
454+
large_set: other,
455+
}
456+
} else if other.len() <= self.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
457+
IntersectionInner::Search {
458+
small_iter: other.iter(),
459+
large_set: self,
460+
}
461+
} else {
462+
IntersectionInner::Stitch {
463+
a: self.iter(),
464+
b: other.iter(),
465+
}
466+
}
467+
}
468+
},
413469
}
414470
}
415471

@@ -544,43 +600,61 @@ impl<T: Ord> BTreeSet<T> {
544600
#[stable(feature = "rust1", since = "1.0.0")]
545601
pub fn is_subset(&self, other: &BTreeSet<T>) -> bool {
546602
// Same result as self.difference(other).next().is_none()
547-
// but the 3 paths below are faster (in order: hugely, 20%, 5%).
603+
// but the code below is faster (hugely in some cases).
548604
if self.len() > other.len() {
549-
false
550-
} else if self.len() > other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
551-
// Self is not much smaller than other set.
552-
// Stolen from TreeMap
553-
let mut x = self.iter();
554-
let mut y = other.iter();
555-
let mut a = x.next();
556-
let mut b = y.next();
557-
while a.is_some() {
558-
if b.is_none() {
605+
return false;
606+
}
607+
let (self_min, self_max) = if let (Some(self_min), Some(self_max)) =
608+
(self.iter().next(), self.iter().next_back())
609+
{
610+
(self_min, self_max)
611+
} else {
612+
return true; // self is empty
613+
};
614+
let (other_min, other_max) = if let (Some(other_min), Some(other_max)) =
615+
(other.iter().next(), other.iter().next_back())
616+
{
617+
(other_min, other_max)
618+
} else {
619+
return false; // other is empty
620+
};
621+
let mut self_iter = self.iter();
622+
match self_min.cmp(other_min) {
623+
Less => return false,
624+
Equal => {
625+
self_iter.next();
626+
}
627+
Greater => (),
628+
}
629+
match self_max.cmp(other_max) {
630+
Greater => return false,
631+
Equal => {
632+
self_iter.next_back();
633+
}
634+
Less => (),
635+
}
636+
if self_iter.len() <= other.len() / ITER_PERFORMANCE_TIPPING_SIZE_DIFF {
637+
// Big difference in number of elements.
638+
for next in self_iter {
639+
if !other.contains(next) {
559640
return false;
560641
}
561-
562-
let a1 = a.unwrap();
563-
let b1 = b.unwrap();
564-
565-
match b1.cmp(a1) {
566-
Less => (),
567-
Greater => return false,
568-
Equal => a = x.next(),
569-
}
570-
571-
b = y.next();
572642
}
573-
true
574643
} else {
575-
// Big difference in number of elements, or both sets are empty.
576-
// Iterate the small set, searching for matches in the large set.
577-
for next in self {
578-
if !other.contains(next) {
579-
return false;
644+
// Self is not much smaller than other set.
645+
let mut other_iter = other.iter();
646+
other_iter.next();
647+
other_iter.next_back();
648+
let mut self_next = self_iter.next();
649+
while let Some(self1) = self_next {
650+
match other_iter.next().map_or(Less, |other1| self1.cmp(other1)) {
651+
Less => return false,
652+
Equal => self_next = self_iter.next(),
653+
Greater => (),
580654
}
581655
}
582-
true
583656
}
657+
true
584658
}
585659

586660
/// Returns `true` if the set is a superset of another,
@@ -1120,6 +1194,7 @@ impl<T> Clone for Difference<'_, T> {
11201194
self_iter: self_iter.clone(),
11211195
other_set,
11221196
},
1197+
DifferenceInner::Iterate(iter) => DifferenceInner::Iterate(iter.clone()),
11231198
},
11241199
}
11251200
}
@@ -1138,7 +1213,7 @@ impl<'a, T: Ord> Iterator for Difference<'a, T> {
11381213
loop {
11391214
match other_iter
11401215
.peek()
1141-
.map_or(Less, |other_next| Ord::cmp(self_next, other_next))
1216+
.map_or(Less, |other_next| self_next.cmp(other_next))
11421217
{
11431218
Less => return Some(self_next),
11441219
Equal => {
@@ -1160,19 +1235,21 @@ impl<'a, T: Ord> Iterator for Difference<'a, T> {
11601235
return Some(self_next);
11611236
}
11621237
},
1238+
DifferenceInner::Iterate(iter) => iter.next(),
11631239
}
11641240
}
11651241

11661242
fn size_hint(&self) -> (usize, Option<usize>) {
11671243
let (self_len, other_len) = match &self.inner {
11681244
DifferenceInner::Stitch {
11691245
self_iter,
1170-
other_iter
1246+
other_iter,
11711247
} => (self_iter.len(), other_iter.len()),
11721248
DifferenceInner::Search {
11731249
self_iter,
1174-
other_set
1250+
other_set,
11751251
} => (self_iter.len(), other_set.len()),
1252+
DifferenceInner::Iterate(iter) => (iter.len(), 0),
11761253
};
11771254
(self_len.saturating_sub(other_len), Some(self_len))
11781255
}
@@ -1234,6 +1311,7 @@ impl<T> Clone for Intersection<'_, T> {
12341311
small_iter: small_iter.clone(),
12351312
large_set,
12361313
},
1314+
IntersectionInner::Answer(answer) => IntersectionInner::Answer(answer.clone()),
12371315
},
12381316
}
12391317
}
@@ -1251,7 +1329,7 @@ impl<'a, T: Ord> Iterator for Intersection<'a, T> {
12511329
let mut a_next = a.next()?;
12521330
let mut b_next = b.next()?;
12531331
loop {
1254-
match Ord::cmp(a_next, b_next) {
1332+
match a_next.cmp(b_next) {
12551333
Less => a_next = a.next()?,
12561334
Greater => b_next = b.next()?,
12571335
Equal => return Some(a_next),
@@ -1267,15 +1345,17 @@ impl<'a, T: Ord> Iterator for Intersection<'a, T> {
12671345
return Some(small_next);
12681346
}
12691347
},
1348+
IntersectionInner::Answer(answer) => answer.take(),
12701349
}
12711350
}
12721351

12731352
fn size_hint(&self) -> (usize, Option<usize>) {
1274-
let min_len = match &self.inner {
1275-
IntersectionInner::Stitch { a, b } => min(a.len(), b.len()),
1276-
IntersectionInner::Search { small_iter, .. } => small_iter.len(),
1277-
};
1278-
(0, Some(min_len))
1353+
match &self.inner {
1354+
IntersectionInner::Stitch { a, b } => (0, Some(min(a.len(), b.len()))),
1355+
IntersectionInner::Search { small_iter, .. } => (0, Some(small_iter.len())),
1356+
IntersectionInner::Answer(None) => (0, Some(0)),
1357+
IntersectionInner::Answer(Some(_)) => (1, Some(1)),
1358+
}
12791359
}
12801360
}
12811361

0 commit comments

Comments
 (0)