Skip to content

Commit 4105506

Browse files
committed
specialize io::copy to use the memory of the writer if it is a BufWriter
1 parent 0e63af5 commit 4105506

File tree

4 files changed

+138
-8
lines changed

4 files changed

+138
-8
lines changed

library/std/src/io/buffered/bufwriter.rs

+13-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ impl<W: Write> BufWriter<W> {
117117
/// "successfully written" (by returning nonzero success values from
118118
/// `write`), any 0-length writes from `inner` must be reported as i/o
119119
/// errors from this method.
120-
pub(super) fn flush_buf(&mut self) -> io::Result<()> {
120+
pub(in crate::io) fn flush_buf(&mut self) -> io::Result<()> {
121121
/// Helper struct to ensure the buffer is updated after all the writes
122122
/// are complete. It tracks the number of written bytes and drains them
123123
/// all from the front of the buffer when dropped.
@@ -243,6 +243,18 @@ impl<W: Write> BufWriter<W> {
243243
&self.buf
244244
}
245245

246+
/// Returns a mutable reference to the internal buffer.
247+
///
248+
/// This can be used to write data directly into the buffer without triggering writers
249+
/// to the underlying writer.
250+
///
251+
/// That the buffer is a `Vec` is an implementation detail.
252+
/// Callers should not modify the capacity as there currently is no public API to do so
253+
/// and thus any capacity changes would be unexpected by the user.
254+
pub(in crate::io) fn buffer_mut(&mut self) -> &mut Vec<u8> {
255+
&mut self.buf
256+
}
257+
246258
/// Returns the number of bytes the internal buffer can hold without flushing.
247259
///
248260
/// # Examples

library/std/src/io/copy.rs

+74-6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::io::{self, ErrorKind, Read, Write};
1+
use super::{BufWriter, ErrorKind, Read, Result, Write, DEFAULT_BUF_SIZE};
22
use crate::mem::MaybeUninit;
33

44
/// Copies the entire contents of a reader into a writer.
@@ -40,7 +40,7 @@ use crate::mem::MaybeUninit;
4040
/// }
4141
/// ```
4242
#[stable(feature = "rust1", since = "1.0.0")]
43-
pub fn copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> io::Result<u64>
43+
pub fn copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> Result<u64>
4444
where
4545
R: Read,
4646
W: Write,
@@ -54,14 +54,82 @@ where
5454
}
5555
}
5656

57-
/// The general read-write-loop implementation of
58-
/// `io::copy` that is used when specializations are not available or not applicable.
59-
pub(crate) fn generic_copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> io::Result<u64>
57+
/// The userspace read-write-loop implementation of `io::copy` that is used when
58+
/// OS-specific specializations for copy offloading are not available or not applicable.
59+
pub(crate) fn generic_copy<R: ?Sized, W: ?Sized>(reader: &mut R, writer: &mut W) -> Result<u64>
6060
where
6161
R: Read,
6262
W: Write,
6363
{
64-
let mut buf = MaybeUninit::<[u8; super::DEFAULT_BUF_SIZE]>::uninit();
64+
BufferedCopySpec::copy_to(reader, writer)
65+
}
66+
67+
/// Specialization of the read-write loop that either uses a stack buffer
68+
/// or reuses the internal buffer of a BufWriter
69+
trait BufferedCopySpec: Write {
70+
fn copy_to<R: Read + ?Sized>(reader: &mut R, writer: &mut Self) -> Result<u64>;
71+
}
72+
73+
impl<W: Write + ?Sized> BufferedCopySpec for W {
74+
default fn copy_to<R: Read + ?Sized>(reader: &mut R, writer: &mut Self) -> Result<u64> {
75+
stack_buffer_copy(reader, writer)
76+
}
77+
}
78+
79+
impl<I: Write> BufferedCopySpec for BufWriter<I> {
80+
fn copy_to<R: Read + ?Sized>(reader: &mut R, writer: &mut Self) -> Result<u64> {
81+
if writer.capacity() < DEFAULT_BUF_SIZE {
82+
return stack_buffer_copy(reader, writer);
83+
}
84+
85+
// FIXME: #42788
86+
//
87+
// - This creates a (mut) reference to a slice of
88+
// _uninitialized_ integers, which is **undefined behavior**
89+
//
90+
// - Only the standard library gets to soundly "ignore" this,
91+
// based on its privileged knowledge of unstable rustc
92+
// internals;
93+
unsafe {
94+
let spare_cap = writer.buffer_mut().spare_capacity_mut();
95+
reader.initializer().initialize(MaybeUninit::slice_assume_init_mut(spare_cap));
96+
}
97+
98+
let mut len = 0;
99+
100+
loop {
101+
let buf = writer.buffer_mut();
102+
let spare_cap = buf.spare_capacity_mut();
103+
104+
if spare_cap.len() >= DEFAULT_BUF_SIZE {
105+
match reader.read(unsafe { MaybeUninit::slice_assume_init_mut(spare_cap) }) {
106+
Ok(0) => return Ok(len), // EOF reached
107+
Ok(bytes_read) => {
108+
assert!(bytes_read <= spare_cap.len());
109+
// Safety: The initializer contract guarantees that either it or `read`
110+
// will have initialized these bytes. And we just checked that the number
111+
// of bytes is within the buffer capacity.
112+
unsafe { buf.set_len(buf.len() + bytes_read) };
113+
len += bytes_read as u64;
114+
// Read again if the buffer still has enough capacity, as BufWriter itself would do
115+
// This will occur if the reader returns short reads
116+
continue;
117+
}
118+
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
119+
Err(e) => return Err(e),
120+
}
121+
}
122+
123+
writer.flush_buf()?;
124+
}
125+
}
126+
}
127+
128+
fn stack_buffer_copy<R: Read + ?Sized, W: Write + ?Sized>(
129+
reader: &mut R,
130+
writer: &mut W,
131+
) -> Result<u64> {
132+
let mut buf = MaybeUninit::<[u8; DEFAULT_BUF_SIZE]>::uninit();
65133
// FIXME: #42788
66134
//
67135
// - This creates a (mut) reference to a slice of

library/std/src/io/util/tests.rs

+49-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
use crate::cmp::{max, min};
12
use crate::io::prelude::*;
2-
use crate::io::{copy, empty, repeat, sink, Empty, Repeat, SeekFrom, Sink};
3+
use crate::io::{
4+
copy, empty, repeat, sink, BufWriter, Empty, Repeat, Result, SeekFrom, Sink, DEFAULT_BUF_SIZE,
5+
};
36

47
#[test]
58
fn copy_copies() {
@@ -11,6 +14,51 @@ fn copy_copies() {
1114
assert_eq!(copy(&mut r as &mut dyn Read, &mut w as &mut dyn Write).unwrap(), 1 << 17);
1215
}
1316

17+
struct ShortReader {
18+
cap: usize,
19+
read_size: usize,
20+
observed_buffer: usize,
21+
}
22+
23+
impl Read for ShortReader {
24+
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
25+
let bytes = min(self.cap, self.read_size);
26+
self.cap -= bytes;
27+
self.observed_buffer = max(self.observed_buffer, buf.len());
28+
Ok(bytes)
29+
}
30+
}
31+
32+
struct WriteObserver {
33+
observed_buffer: usize,
34+
}
35+
36+
impl Write for WriteObserver {
37+
fn write(&mut self, buf: &[u8]) -> Result<usize> {
38+
self.observed_buffer = max(self.observed_buffer, buf.len());
39+
Ok(buf.len())
40+
}
41+
42+
fn flush(&mut self) -> Result<()> {
43+
Ok(())
44+
}
45+
}
46+
47+
#[test]
48+
fn copy_specializes_bufwriter() {
49+
let cap = 117 * 1024;
50+
let buf_sz = 16 * 1024;
51+
let mut r = ShortReader { cap, observed_buffer: 0, read_size: 1337 };
52+
let mut w = BufWriter::with_capacity(buf_sz, WriteObserver { observed_buffer: 0 });
53+
assert_eq!(
54+
copy(&mut r, &mut w).unwrap(),
55+
cap as u64,
56+
"expected the whole capacity to be copied"
57+
);
58+
assert_eq!(r.observed_buffer, buf_sz, "expected a large buffer to be provided to the reader");
59+
assert!(w.get_mut().observed_buffer > DEFAULT_BUF_SIZE, "expected coalesced writes");
60+
}
61+
1462
#[test]
1563
fn sink_sinks() {
1664
let mut s = sink();

library/std/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@
283283
#![feature(maybe_uninit_extra)]
284284
#![feature(maybe_uninit_ref)]
285285
#![feature(maybe_uninit_slice)]
286+
#![feature(maybe_uninit_uninit_array)]
286287
#![feature(min_specialization)]
287288
#![feature(needs_panic_runtime)]
288289
#![feature(negative_impls)]
@@ -326,6 +327,7 @@
326327
#![feature(unsafe_cell_raw_get)]
327328
#![feature(unwind_attributes)]
328329
#![feature(vec_into_raw_parts)]
330+
#![feature(vec_spare_capacity)]
329331
#![feature(wake_trait)]
330332
// NB: the above list is sorted to minimize merge conflicts.
331333
#![default_lib_allocator]

0 commit comments

Comments
 (0)