Skip to content

Commit b32e6e0

Browse files
authored
Rollup merge of rust-lang#81136 - Xavientois:io_reader_size_hint, r=cramertj
Improved IO Bytes Size Hint After trying to implement better `size_hint()` return values for `File` in [this PR](rust-lang#81044) and changing to implementing it for `BufReader` in [this PR](rust-lang#81052), I have arrived at this implementation that provides tighter bounds for the `Bytes` iterator of various readers including `BufReader`, `Empty`, and `Chain`. Unfortunately, for `BufReader`, the size_hint only improves after calling `fill_buffer` due to it using the contents of the buffer for the hint. Nevertheless, the the tighter bounds should result in better pre-allocation of space to handle the contents of the `Bytes` iterator. Closes rust-lang#81052
2 parents 161d9f0 + 7674ae1 commit b32e6e0

File tree

4 files changed

+103
-3
lines changed

4 files changed

+103
-3
lines changed

library/std/src/io/buffered/bufreader.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use crate::cmp;
22
use crate::fmt;
3-
use crate::io::{self, BufRead, Initializer, IoSliceMut, Read, Seek, SeekFrom, DEFAULT_BUF_SIZE};
3+
use crate::io::{
4+
self, BufRead, Initializer, IoSliceMut, Read, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
5+
};
46

57
/// The `BufReader<R>` struct adds buffering to any reader.
68
///
@@ -435,3 +437,9 @@ impl<R: Seek> Seek for BufReader<R> {
435437
})
436438
}
437439
}
440+
441+
impl<T> SizeHint for BufReader<T> {
442+
fn lower_bound(&self) -> usize {
443+
self.buffer().len()
444+
}
445+
}

library/std/src/io/mod.rs

+37
Original file line numberDiff line numberDiff line change
@@ -2238,6 +2238,19 @@ impl<T: BufRead, U: BufRead> BufRead for Chain<T, U> {
22382238
}
22392239
}
22402240

2241+
impl<T, U> SizeHint for Chain<T, U> {
2242+
fn lower_bound(&self) -> usize {
2243+
SizeHint::lower_bound(&self.first) + SizeHint::lower_bound(&self.second)
2244+
}
2245+
2246+
fn upper_bound(&self) -> Option<usize> {
2247+
match (SizeHint::upper_bound(&self.first), SizeHint::upper_bound(&self.second)) {
2248+
(Some(first), Some(second)) => Some(first + second),
2249+
_ => None,
2250+
}
2251+
}
2252+
}
2253+
22412254
/// Reader adaptor which limits the bytes read from an underlying reader.
22422255
///
22432256
/// This struct is generally created by calling [`take`] on a reader.
@@ -2464,6 +2477,30 @@ impl<R: Read> Iterator for Bytes<R> {
24642477
};
24652478
}
24662479
}
2480+
2481+
fn size_hint(&self) -> (usize, Option<usize>) {
2482+
SizeHint::size_hint(&self.inner)
2483+
}
2484+
}
2485+
2486+
trait SizeHint {
2487+
fn lower_bound(&self) -> usize;
2488+
2489+
fn upper_bound(&self) -> Option<usize>;
2490+
2491+
fn size_hint(&self) -> (usize, Option<usize>) {
2492+
(self.lower_bound(), self.upper_bound())
2493+
}
2494+
}
2495+
2496+
impl<T> SizeHint for T {
2497+
default fn lower_bound(&self) -> usize {
2498+
0
2499+
}
2500+
2501+
default fn upper_bound(&self) -> Option<usize> {
2502+
None
2503+
}
24672504
}
24682505

24692506
/// An iterator over the contents of an instance of `BufRead` split on a

library/std/src/io/tests.rs

+48-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use super::{repeat, Cursor, SeekFrom};
22
use crate::cmp::{self, min};
33
use crate::io::{self, IoSlice, IoSliceMut};
4-
use crate::io::{BufRead, Read, Seek, Write};
4+
use crate::io::{BufRead, BufReader, Read, Seek, Write};
55
use crate::ops::Deref;
66

77
#[test]
@@ -198,6 +198,53 @@ fn chain_bufread() {
198198
cmp_bufread(chain1, chain2, &testdata[..]);
199199
}
200200

201+
#[test]
202+
fn bufreader_size_hint() {
203+
let testdata = b"ABCDEFGHIJKL";
204+
let mut buf_reader = BufReader::new(&testdata[..]);
205+
assert_eq!(buf_reader.buffer().len(), 0);
206+
207+
let buffer_length = testdata.len();
208+
buf_reader.fill_buf().unwrap();
209+
210+
// Check that size hint matches buffer contents
211+
let mut buffered_bytes = buf_reader.bytes();
212+
let (lower_bound, _upper_bound) = buffered_bytes.size_hint();
213+
assert_eq!(lower_bound, buffer_length);
214+
215+
// Check that size hint matches buffer contents after advancing
216+
buffered_bytes.next().unwrap().unwrap();
217+
let (lower_bound, _upper_bound) = buffered_bytes.size_hint();
218+
assert_eq!(lower_bound, buffer_length - 1);
219+
}
220+
221+
#[test]
222+
fn empty_size_hint() {
223+
let size_hint = io::empty().bytes().size_hint();
224+
assert_eq!(size_hint, (0, Some(0)));
225+
}
226+
227+
#[test]
228+
fn chain_empty_size_hint() {
229+
let chain = io::empty().chain(io::empty());
230+
let size_hint = chain.bytes().size_hint();
231+
assert_eq!(size_hint, (0, Some(0)));
232+
}
233+
234+
#[test]
235+
fn chain_size_hint() {
236+
let testdata = b"ABCDEFGHIJKL";
237+
let mut buf_reader_1 = BufReader::new(&testdata[..6]);
238+
let mut buf_reader_2 = BufReader::new(&testdata[6..]);
239+
240+
buf_reader_1.fill_buf().unwrap();
241+
buf_reader_2.fill_buf().unwrap();
242+
243+
let chain = buf_reader_1.chain(buf_reader_2);
244+
let size_hint = chain.bytes().size_hint();
245+
assert_eq!(size_hint, (testdata.len(), None));
246+
}
247+
201248
#[test]
202249
fn chain_zero_length_read_is_not_eof() {
203250
let a = b"A";

library/std/src/io/util.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
mod tests;
55

66
use crate::fmt;
7-
use crate::io::{self, BufRead, Initializer, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write};
7+
use crate::io::{
8+
self, BufRead, Initializer, IoSlice, IoSliceMut, Read, Seek, SeekFrom, SizeHint, Write,
9+
};
810

911
/// A reader which is always at EOF.
1012
///
@@ -80,6 +82,12 @@ impl fmt::Debug for Empty {
8082
}
8183
}
8284

85+
impl SizeHint for Empty {
86+
fn upper_bound(&self) -> Option<usize> {
87+
Some(0)
88+
}
89+
}
90+
8391
/// A reader which yields one byte over and over and over and over and over and...
8492
///
8593
/// This struct is generally created by calling [`repeat()`]. Please

0 commit comments

Comments
 (0)