Rollup merge of #113493 - the8472:spec-iocopy-slice, r=Mark-Simulacrum

additional io::copy specializations

- copying from `&[u8]` and `VecDeque<u8>`
- copying to `Vec<u8>`

A user on reddit [mentioned they saw a performance drop](https://www.reddit.com/r/rust/comments/14shv9f/comment/jr0bg6j/?context=3) when copying from a slice.
This commit is contained in:
Matthias Krüger 2023-07-09 16:33:37 +02:00 committed by GitHub
commit 205ae163e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 116 additions and 3 deletions

View File

@ -1,4 +1,8 @@
use super::{BorrowedBuf, BufReader, BufWriter, ErrorKind, Read, Result, Write, DEFAULT_BUF_SIZE}; use super::{BorrowedBuf, BufReader, BufWriter, ErrorKind, Read, Result, Write, DEFAULT_BUF_SIZE};
use crate::alloc::Allocator;
use crate::cmp;
use crate::collections::VecDeque;
use crate::io::IoSlice;
use crate::mem::MaybeUninit; use crate::mem::MaybeUninit;
#[cfg(test)] #[cfg(test)]
@ -86,7 +90,7 @@ where
/// Specialization of the read-write loop that reuses the internal /// Specialization of the read-write loop that reuses the internal
/// buffer of a BufReader. If there's no buffer then the writer side /// buffer of a BufReader. If there's no buffer then the writer side
/// should be used intead. /// should be used instead.
trait BufferedReaderSpec { trait BufferedReaderSpec {
fn buffer_size(&self) -> usize; fn buffer_size(&self) -> usize;
@ -104,7 +108,39 @@ where
} }
default fn copy_to(&mut self, _to: &mut (impl Write + ?Sized)) -> Result<u64> { default fn copy_to(&mut self, _to: &mut (impl Write + ?Sized)) -> Result<u64> {
unimplemented!("only called from specializations"); unreachable!("only called from specializations")
}
}
impl BufferedReaderSpec for &[u8] {
fn buffer_size(&self) -> usize {
// prefer this specialization since the source "buffer" is all we'll ever need,
// even if it's small
usize::MAX
}
fn copy_to(&mut self, to: &mut (impl Write + ?Sized)) -> Result<u64> {
let len = self.len();
to.write_all(self)?;
*self = &self[len..];
Ok(len as u64)
}
}
impl<A: Allocator> BufferedReaderSpec for VecDeque<u8, A> {
fn buffer_size(&self) -> usize {
// prefer this specialization since the source "buffer" is all we'll ever need,
// even if it's small
usize::MAX
}
fn copy_to(&mut self, to: &mut (impl Write + ?Sized)) -> Result<u64> {
let len = self.len();
let (front, back) = self.as_slices();
let bufs = &mut [IoSlice::new(front), IoSlice::new(back)];
to.write_all_vectored(bufs)?;
self.clear();
Ok(len as u64)
} }
} }
@ -218,6 +254,47 @@ impl<I: Write + ?Sized> BufferedWriterSpec for BufWriter<I> {
} }
} }
impl<A: Allocator> BufferedWriterSpec for Vec<u8, A> {
fn buffer_size(&self) -> usize {
cmp::max(DEFAULT_BUF_SIZE, self.capacity() - self.len())
}
fn copy_from<R: Read + ?Sized>(&mut self, reader: &mut R) -> Result<u64> {
let mut bytes = 0;
// avoid allocating before we have determined that there's anything to read
if self.capacity() == 0 {
bytes = stack_buffer_copy(&mut reader.take(DEFAULT_BUF_SIZE as u64), self)?;
if bytes == 0 {
return Ok(0);
}
}
loop {
self.reserve(DEFAULT_BUF_SIZE);
let mut buf: BorrowedBuf<'_> = self.spare_capacity_mut().into();
match reader.read_buf(buf.unfilled()) {
Ok(()) => {}
Err(e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
};
let read = buf.filled().len();
if read == 0 {
break;
}
// SAFETY: BorrowedBuf guarantees all of its filled bytes are init
// and the number of read bytes can't exceed the spare capacity since
// that's what the buffer is borrowing from.
unsafe { self.set_len(self.len() + read) };
bytes += read as u64;
}
Ok(bytes)
}
}
fn stack_buffer_copy<R: Read + ?Sized, W: Write + ?Sized>( fn stack_buffer_copy<R: Read + ?Sized, W: Write + ?Sized>(
reader: &mut R, reader: &mut R,
writer: &mut W, writer: &mut W,

View File

@ -1,4 +1,6 @@
use crate::cmp::{max, min}; use crate::cmp::{max, min};
use crate::collections::VecDeque;
use crate::io;
use crate::io::*; use crate::io::*;
#[test] #[test]
@ -19,7 +21,7 @@ struct ShortReader {
impl Read for ShortReader { impl Read for ShortReader {
fn read(&mut self, buf: &mut [u8]) -> Result<usize> { fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
let bytes = min(self.cap, self.read_size); let bytes = min(self.cap, self.read_size).min(buf.len());
self.cap -= bytes; self.cap -= bytes;
self.observed_buffer = max(self.observed_buffer, buf.len()); self.observed_buffer = max(self.observed_buffer, buf.len());
Ok(bytes) Ok(bytes)
@ -78,6 +80,40 @@ fn copy_specializes_bufreader() {
); );
} }
#[test]
fn copy_specializes_to_vec() {
let cap = 123456;
let mut source = ShortReader { cap, observed_buffer: 0, read_size: 1337 };
let mut sink = Vec::new();
assert_eq!(cap as u64, io::copy(&mut source, &mut sink).unwrap());
assert!(
source.observed_buffer > DEFAULT_BUF_SIZE,
"expected a large buffer to be provided to the reader"
);
}
#[test]
fn copy_specializes_from_vecdeque() {
let mut source = VecDeque::with_capacity(100 * 1024);
for _ in 0..20 * 1024 {
source.push_front(0);
}
for _ in 0..20 * 1024 {
source.push_back(0);
}
let mut sink = WriteObserver { observed_buffer: 0 };
assert_eq!(40 * 1024u64, io::copy(&mut source, &mut sink).unwrap());
assert_eq!(20 * 1024, sink.observed_buffer);
}
#[test]
fn copy_specializes_from_slice() {
let mut source = [1; 60 * 1024].as_slice();
let mut sink = WriteObserver { observed_buffer: 0 };
assert_eq!(60 * 1024u64, io::copy(&mut source, &mut sink).unwrap());
assert_eq!(60 * 1024, sink.observed_buffer);
}
#[cfg(unix)] #[cfg(unix)]
mod io_benches { mod io_benches {
use crate::fs::File; use crate::fs::File;