Skip to content

Commit d25de31

Browse files
committed
Auto merge of #89165 - jkugelman:read-to-end-overallocation, r=joshtriplett
Fix read_to_end to not grow an exact size buffer If you know how much data to expect and use `Vec::with_capacity` to pre-allocate a buffer of that capacity, `Read::read_to_end` will still double its capacity. It needs some space to perform a read, even though that read ends up returning `0`. It's a bummer to carefully pre-allocate 1GB to read a 1GB file into memory and end up using 2GB. This fixes that behavior by special casing a full buffer and reading into a small "probe" buffer instead. If that read returns `0` then it's confirmed that the buffer was the perfect size. If it doesn't, the probe buffer is appended to the normal buffer and the read loop continues. Fixing this allows several workarounds in the standard library to be removed: - `Take` no longer needs to override `Read::read_to_end`. - The `reservation_size` callback that allowed `Take` to inhibit the previous over-allocation behavior isn't needed. - `fs::read` doesn't need to reserve an extra byte in `initial_buffer_size`. Curiously, there was a unit test that specifically checked that `Read::read_to_end` *does* over-allocate. I removed that test, too.
2 parents e737694 + 9b9c24e commit d25de31

File tree

3 files changed

+37
-41
lines changed

3 files changed

+37
-41
lines changed

library/std/src/fs.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -200,10 +200,9 @@ pub struct DirBuilder {
200200

201201
/// Indicates how large a buffer to pre-allocate before reading the entire file.
202202
fn initial_buffer_size(file: &File) -> usize {
203-
// Allocate one extra byte so the buffer doesn't need to grow before the
204-
// final `read` call at the end of the file. Don't worry about `usize`
205-
// overflow because reading will fail regardless in that case.
206-
file.metadata().map(|m| m.len() as usize + 1).unwrap_or(0)
203+
// Don't worry about `usize` overflow because reading will fail regardless
204+
// in that case.
205+
file.metadata().map(|m| m.len() as usize).unwrap_or(0)
207206
}
208207

209208
/// Read the entire contents of a file into a bytes vector.

library/std/src/io/mod.rs

+31-22
Original file line numberDiff line numberDiff line change
@@ -362,22 +362,18 @@ where
362362
// Because we're extending the buffer with uninitialized data for trusted
363363
// readers, we need to make sure to truncate that if any of this panics.
364364
fn read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
365-
read_to_end_with_reservation(r, buf, |_| 32)
366-
}
367-
368-
fn read_to_end_with_reservation<R, F>(
369-
r: &mut R,
370-
buf: &mut Vec<u8>,
371-
mut reservation_size: F,
372-
) -> Result<usize>
373-
where
374-
R: Read + ?Sized,
375-
F: FnMut(&R) -> usize,
376-
{
377365
let start_len = buf.len();
366+
let start_cap = buf.capacity();
378367
let mut g = Guard { len: buf.len(), buf };
379368
loop {
380-
if g.len == g.buf.len() {
369+
// If we've read all the way up to the capacity, reserve more space.
370+
if g.len == g.buf.capacity() {
371+
g.buf.reserve(32);
372+
}
373+
374+
// Initialize any excess capacity and adjust the length so we can write
375+
// to it.
376+
if g.buf.len() < g.buf.capacity() {
381377
unsafe {
382378
// FIXME(danielhenrymantilla): #42788
383379
//
@@ -387,7 +383,6 @@ where
387383
// - Only the standard library gets to soundly "ignore" this,
388384
// based on its privileged knowledge of unstable rustc
389385
// internals;
390-
g.buf.reserve(reservation_size(r));
391386
let capacity = g.buf.capacity();
392387
g.buf.set_len(capacity);
393388
r.initializer().initialize(&mut g.buf[g.len..]);
@@ -404,9 +399,30 @@ where
404399
assert!(n <= buf.len());
405400
g.len += n;
406401
}
407-
Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
402+
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
408403
Err(e) => return Err(e),
409404
}
405+
406+
if g.len == g.buf.capacity() && g.buf.capacity() == start_cap {
407+
// The buffer might be an exact fit. Let's read into a probe buffer
408+
// and see if it returns `Ok(0)`. If so, we've avoided an
409+
// unnecessary doubling of the capacity. But if not, append the
410+
// probe buffer to the primary buffer and let its capacity grow.
411+
let mut probe = [0u8; 32];
412+
413+
loop {
414+
match r.read(&mut probe) {
415+
Ok(0) => return Ok(g.len - start_len),
416+
Ok(n) => {
417+
g.buf.extend_from_slice(&probe[..n]);
418+
g.len += n;
419+
break;
420+
}
421+
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
422+
Err(e) => return Err(e),
423+
}
424+
}
425+
}
410426
}
411427
}
412428

@@ -2584,13 +2600,6 @@ impl<T: Read> Read for Take<T> {
25842600
unsafe fn initializer(&self) -> Initializer {
25852601
self.inner.initializer()
25862602
}
2587-
2588-
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize> {
2589-
// Pass in a reservation_size closure that respects the current value
2590-
// of limit for each read. If we hit the read limit, this prevents the
2591-
// final zero-byte read from allocating again.
2592-
read_to_end_with_reservation(self, buf, |self_| cmp::min(self_.limit, 32) as usize)
2593-
}
25942603
}
25952604

25962605
#[stable(feature = "rust1", since = "1.0.0")]

library/std/src/io/tests.rs

+3-15
Original file line numberDiff line numberDiff line change
@@ -362,24 +362,12 @@ impl<'a> Read for ExampleSliceReader<'a> {
362362
fn test_read_to_end_capacity() -> io::Result<()> {
363363
let input = &b"foo"[..];
364364

365-
// read_to_end() generally needs to over-allocate, both for efficiency
366-
// and so that it can distinguish EOF. Assert that this is the case
367-
// with this simple ExampleSliceReader struct, which uses the default
368-
// implementation of read_to_end. Even though vec1 is allocated with
369-
// exactly enough capacity for the read, read_to_end will allocate more
370-
// space here.
365+
// read_to_end() takes care not to over-allocate when a buffer is the
366+
// exact size needed.
371367
let mut vec1 = Vec::with_capacity(input.len());
372368
ExampleSliceReader { slice: input }.read_to_end(&mut vec1)?;
373369
assert_eq!(vec1.len(), input.len());
374-
assert!(vec1.capacity() > input.len(), "allocated more");
375-
376-
// However, std::io::Take includes an implementation of read_to_end
377-
// that will not allocate when the limit has already been reached. In
378-
// this case, vec2 never grows.
379-
let mut vec2 = Vec::with_capacity(input.len());
380-
ExampleSliceReader { slice: input }.take(input.len() as u64).read_to_end(&mut vec2)?;
381-
assert_eq!(vec2.len(), input.len());
382-
assert_eq!(vec2.capacity(), input.len(), "did not allocate more");
370+
assert_eq!(vec1.capacity(), input.len(), "did not allocate more");
383371

384372
Ok(())
385373
}

0 commit comments

Comments
 (0)