Skip to content

Commit 2cec687

Browse files
committed
Auto merge of #98004 - paolobarbolini:vecdeque-extend-trustedlen, r=the8472
Add VecDeque::extend from TrustedLen specialization Continuation of #95904 Inspired by how [`VecDeque::copy_slice` works](https://github.com/rust-lang/rust/blob/c08b235a5ce10167632bb0fddcd0c5d67f2d42e3/library/alloc/src/collections/vec_deque/mod.rs#L437-L454). ## Benchmarks Before ``` test vec_deque::bench_extend_chained_bytes ... bench: 1,026 ns/iter (+/- 17) test vec_deque::bench_extend_chained_trustedlen ... bench: 1,024 ns/iter (+/- 40) test vec_deque::bench_extend_trustedlen ... bench: 637 ns/iter (+/- 693) ``` After ``` test vec_deque::bench_extend_chained_bytes ... bench: 828 ns/iter (+/- 24) test vec_deque::bench_extend_chained_trustedlen ... bench: 25 ns/iter (+/- 1) test vec_deque::bench_extend_trustedlen ... bench: 21 ns/iter (+/- 0) ``` ## Why do it this way https://rust.godbolt.org/z/15qY1fMYh The Compiler Explorer example shows how "just" removing the capacity check, like the [`Vec` `TrustedLen` specialization](https://github.com/rust-lang/rust/blob/c08b235a5ce10167632bb0fddcd0c5d67f2d42e3/library/alloc/src/vec/spec_extend.rs#L22-L58) does, wouldn't have been enough for `VecDeque`. `wrap_add` would still have greatly limited what LLVM could do while optimizing. --- r? `@the8472`
2 parents cdcc53b + ce3b6f5 commit 2cec687

File tree

8 files changed

+218
-3
lines changed

8 files changed

+218
-3
lines changed

library/alloc/benches/vec_deque.rs

+32
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,35 @@ fn bench_extend_vec(b: &mut Bencher) {
9191
ring.extend(black_box(input));
9292
});
9393
}
94+
95+
#[bench]
96+
fn bench_extend_trustedlen(b: &mut Bencher) {
97+
let mut ring: VecDeque<u16> = VecDeque::with_capacity(1000);
98+
99+
b.iter(|| {
100+
ring.clear();
101+
ring.extend(black_box(0..512));
102+
});
103+
}
104+
105+
#[bench]
106+
fn bench_extend_chained_trustedlen(b: &mut Bencher) {
107+
let mut ring: VecDeque<u16> = VecDeque::with_capacity(1000);
108+
109+
b.iter(|| {
110+
ring.clear();
111+
ring.extend(black_box((0..256).chain(768..1024)));
112+
});
113+
}
114+
115+
#[bench]
116+
fn bench_extend_chained_bytes(b: &mut Bencher) {
117+
let mut ring: VecDeque<u16> = VecDeque::with_capacity(1000);
118+
let input1: &[u16] = &[128; 256];
119+
let input2: &[u16] = &[255; 256];
120+
121+
b.iter(|| {
122+
ring.clear();
123+
ring.extend(black_box(input1.iter().chain(input2.iter())));
124+
});
125+
}

library/alloc/src/collections/vec_deque/mod.rs

+19
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,25 @@ impl<T, A: Allocator> VecDeque<T, A> {
453453
}
454454
}
455455

456+
/// Writes all values from `iter` to `dst`.
457+
///
458+
/// # Safety
459+
///
460+
/// Assumes no wrapping around happens.
461+
/// Assumes capacity is sufficient.
462+
#[inline]
463+
unsafe fn write_iter(
464+
&mut self,
465+
dst: usize,
466+
iter: impl Iterator<Item = T>,
467+
written: &mut usize,
468+
) {
469+
iter.enumerate().for_each(|(i, element)| unsafe {
470+
self.buffer_write(dst + i, element);
471+
*written += 1;
472+
});
473+
}
474+
456475
/// Frobs the head and tail sections around to handle the fact that we
457476
/// just reallocated. Unsafe because it trusts old_capacity.
458477
#[inline]

library/alloc/src/collections/vec_deque/spec_extend.rs

+59
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::alloc::Allocator;
22
use crate::vec;
3+
use core::iter::{ByRefSized, TrustedLen};
34
use core::slice;
45

56
use super::VecDeque;
@@ -34,6 +35,64 @@ where
3435
}
3536
}
3637

38+
impl<T, I, A: Allocator> SpecExtend<T, I> for VecDeque<T, A>
39+
where
40+
I: TrustedLen<Item = T>,
41+
{
42+
default fn spec_extend(&mut self, mut iter: I) {
43+
// This is the case for a TrustedLen iterator.
44+
let (low, high) = iter.size_hint();
45+
if let Some(additional) = high {
46+
debug_assert_eq!(
47+
low,
48+
additional,
49+
"TrustedLen iterator's size hint is not exact: {:?}",
50+
(low, high)
51+
);
52+
self.reserve(additional);
53+
54+
struct WrapAddOnDrop<'a, T, A: Allocator> {
55+
vec_deque: &'a mut VecDeque<T, A>,
56+
written: usize,
57+
}
58+
59+
impl<'a, T, A: Allocator> Drop for WrapAddOnDrop<'a, T, A> {
60+
fn drop(&mut self) {
61+
self.vec_deque.head =
62+
self.vec_deque.wrap_add(self.vec_deque.head, self.written);
63+
}
64+
}
65+
66+
let mut wrapper = WrapAddOnDrop { vec_deque: self, written: 0 };
67+
68+
let head_room = wrapper.vec_deque.cap() - wrapper.vec_deque.head;
69+
unsafe {
70+
wrapper.vec_deque.write_iter(
71+
wrapper.vec_deque.head,
72+
ByRefSized(&mut iter).take(head_room),
73+
&mut wrapper.written,
74+
);
75+
76+
if additional > head_room {
77+
wrapper.vec_deque.write_iter(0, iter, &mut wrapper.written);
78+
}
79+
}
80+
81+
debug_assert_eq!(
82+
additional, wrapper.written,
83+
"The number of items written to VecDeque doesn't match the TrustedLen size hint"
84+
);
85+
} else {
86+
// Per TrustedLen contract a `None` upper bound means that the iterator length
87+
// truly exceeds usize::MAX, which would eventually lead to a capacity overflow anyway.
88+
// Since the other branch already panics eagerly (via `reserve()`) we do the same here.
89+
// This avoids additional codegen for a fallback code path which would eventually
90+
// panic anyway.
91+
panic!("capacity overflow");
92+
}
93+
}
94+
}
95+
3796
impl<T, A: Allocator> SpecExtend<T, vec::IntoIter<T>> for VecDeque<T, A> {
3897
fn spec_extend(&mut self, mut iterator: vec::IntoIter<T>) {
3998
let slice = iterator.as_slice();

library/alloc/src/collections/vec_deque/tests.rs

+97
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use core::iter::TrustedLen;
2+
13
use super::*;
24

35
#[bench]
@@ -791,6 +793,101 @@ fn test_from_vec() {
791793
assert_eq!(vd.len(), vec.len());
792794
}
793795

796+
#[test]
797+
fn test_extend_basic() {
798+
test_extend_impl(false);
799+
}
800+
801+
#[test]
802+
fn test_extend_trusted_len() {
803+
test_extend_impl(true);
804+
}
805+
806+
fn test_extend_impl(trusted_len: bool) {
807+
struct VecDequeTester {
808+
test: VecDeque<usize>,
809+
expected: VecDeque<usize>,
810+
trusted_len: bool,
811+
}
812+
813+
impl VecDequeTester {
814+
fn new(trusted_len: bool) -> Self {
815+
Self { test: VecDeque::new(), expected: VecDeque::new(), trusted_len }
816+
}
817+
818+
fn test_extend<I>(&mut self, iter: I)
819+
where
820+
I: Iterator<Item = usize> + TrustedLen + Clone,
821+
{
822+
struct BasicIterator<I>(I);
823+
impl<I> Iterator for BasicIterator<I>
824+
where
825+
I: Iterator<Item = usize>,
826+
{
827+
type Item = usize;
828+
829+
fn next(&mut self) -> Option<Self::Item> {
830+
self.0.next()
831+
}
832+
}
833+
834+
if self.trusted_len {
835+
self.test.extend(iter.clone());
836+
} else {
837+
self.test.extend(BasicIterator(iter.clone()));
838+
}
839+
840+
for item in iter {
841+
self.expected.push_back(item)
842+
}
843+
844+
assert_eq!(self.test, self.expected);
845+
let (a1, b1) = self.test.as_slices();
846+
let (a2, b2) = self.expected.as_slices();
847+
assert_eq!(a1, a2);
848+
assert_eq!(b1, b2);
849+
}
850+
851+
fn drain<R: RangeBounds<usize> + Clone>(&mut self, range: R) {
852+
self.test.drain(range.clone());
853+
self.expected.drain(range);
854+
855+
assert_eq!(self.test, self.expected);
856+
}
857+
858+
fn clear(&mut self) {
859+
self.test.clear();
860+
self.expected.clear();
861+
}
862+
863+
fn remaining_capacity(&self) -> usize {
864+
self.test.capacity() - self.test.len()
865+
}
866+
}
867+
868+
let mut tester = VecDequeTester::new(trusted_len);
869+
870+
// Initial capacity
871+
tester.test_extend(0..tester.remaining_capacity() - 1);
872+
873+
// Grow
874+
tester.test_extend(1024..2048);
875+
876+
// Wrap around
877+
tester.drain(..128);
878+
879+
tester.test_extend(0..tester.remaining_capacity() - 1);
880+
881+
// Continue
882+
tester.drain(256..);
883+
tester.test_extend(4096..8196);
884+
885+
tester.clear();
886+
887+
// Start again
888+
tester.test_extend(0..32);
889+
}
890+
794891
#[test]
795892
#[should_panic = "capacity overflow"]
796893
fn test_from_vec_zst_overflow() {

library/alloc/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@
143143
#![feature(unchecked_math)]
144144
#![feature(unicode_internals)]
145145
#![feature(unsize)]
146+
#![feature(std_internals)]
146147
//
147148
// Language features:
148149
#![feature(allocator_internals)]

library/core/src/iter/adapters/by_ref_sized.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@ use crate::ops::Try;
44
///
55
/// Ideally this will no longer be required, eventually, but as can be seen in
66
/// the benchmarks (as of Feb 2022 at least) `by_ref` can have performance cost.
7-
pub(crate) struct ByRefSized<'a, I>(pub &'a mut I);
7+
#[unstable(feature = "std_internals", issue = "none")]
8+
#[derive(Debug)]
9+
pub struct ByRefSized<'a, I>(pub &'a mut I);
810

11+
#[unstable(feature = "std_internals", issue = "none")]
912
impl<I: Iterator> Iterator for ByRefSized<'_, I> {
1013
type Item = I::Item;
1114

@@ -47,6 +50,7 @@ impl<I: Iterator> Iterator for ByRefSized<'_, I> {
4750
}
4851
}
4952

53+
#[unstable(feature = "std_internals", issue = "none")]
5054
impl<I: DoubleEndedIterator> DoubleEndedIterator for ByRefSized<'_, I> {
5155
#[inline]
5256
fn next_back(&mut self) -> Option<Self::Item> {

library/core/src/iter/adapters/mod.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ pub use self::{
3232
scan::Scan, skip::Skip, skip_while::SkipWhile, take::Take, take_while::TakeWhile, zip::Zip,
3333
};
3434

35-
pub(crate) use self::by_ref_sized::ByRefSized;
35+
#[unstable(feature = "std_internals", issue = "none")]
36+
pub use self::by_ref_sized::ByRefSized;
3637

3738
#[stable(feature = "iter_cloned", since = "1.1.0")]
3839
pub use self::cloned::Cloned;

library/core/src/iter/mod.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,8 @@ pub use self::traits::{
398398

399399
#[stable(feature = "iter_zip", since = "1.59.0")]
400400
pub use self::adapters::zip;
401+
#[unstable(feature = "std_internals", issue = "none")]
402+
pub use self::adapters::ByRefSized;
401403
#[stable(feature = "iter_cloned", since = "1.1.0")]
402404
pub use self::adapters::Cloned;
403405
#[stable(feature = "iter_copied", since = "1.36.0")]
@@ -422,7 +424,7 @@ pub use self::adapters::{
422424
#[unstable(feature = "iter_intersperse", reason = "recently added", issue = "79524")]
423425
pub use self::adapters::{Intersperse, IntersperseWith};
424426

425-
pub(crate) use self::adapters::{try_process, ByRefSized};
427+
pub(crate) use self::adapters::try_process;
426428

427429
mod adapters;
428430
mod range;

0 commit comments

Comments
 (0)