Skip to content

Commit e413d89

Browse files
committed
Auto merge of #79274 - the8472:probe-eperm, r=nagisa
implement better availability probing for copy_file_range Followup to #75428 (comment) Previously syscall detection was overly pessimistic. Any attempt to copy to an immutable file (EPERM) would disable copy_file_range support for the whole process. The change tries to copy_file_range on invalid file descriptors which will never run into the immutable file case and thus we can clearly distinguish syscall availability.
2 parents 58d2bad + 7647d03 commit e413d89

File tree

1 file changed

+50
-36
lines changed

1 file changed

+50
-36
lines changed

library/std/src/sys/unix/kernel_copy.rs

+50-36
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ use crate::os::unix::io::{AsRawFd, FromRawFd, RawFd};
5959
use crate::os::unix::net::UnixStream;
6060
use crate::process::{ChildStderr, ChildStdin, ChildStdout};
6161
use crate::ptr;
62-
use crate::sync::atomic::{AtomicBool, Ordering};
62+
use crate::sync::atomic::{AtomicBool, AtomicU8, Ordering};
6363
use crate::sys::cvt;
6464

6565
#[cfg(test)]
@@ -491,18 +491,29 @@ impl CopyResult {
491491
}
492492
}
493493

494-
/// linux-specific implementation that will attempt to use copy_file_range for copy offloading
495-
/// as the name says, it only works on regular files
494+
/// Invalid file descriptor.
495+
///
496+
/// Valid file descriptors are guaranteed to be positive numbers (see `open()` manpage)
497+
/// while negative values are used to indicate errors.
498+
/// Thus -1 will never be overlap with a valid open file.
499+
const INVALID_FD: RawFd = -1;
500+
501+
/// Linux-specific implementation that will attempt to use copy_file_range for copy offloading.
502+
/// As the name says, it only works on regular files.
496503
///
497504
/// Callers must handle fallback to a generic copy loop.
498505
/// `Fallback` may indicate non-zero number of bytes already written
499506
/// if one of the files' cursor +`max_len` would exceed u64::MAX (`EOVERFLOW`).
500507
pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> CopyResult {
501508
use crate::cmp;
502509

510+
const NOT_PROBED: u8 = 0;
511+
const UNAVAILABLE: u8 = 1;
512+
const AVAILABLE: u8 = 2;
513+
503514
// Kernel prior to 4.5 don't have copy_file_range
504515
// We store the availability in a global to avoid unnecessary syscalls
505-
static HAS_COPY_FILE_RANGE: AtomicBool = AtomicBool::new(true);
516+
static HAS_COPY_FILE_RANGE: AtomicU8 = AtomicU8::new(NOT_PROBED);
506517

507518
syscall! {
508519
fn copy_file_range(
@@ -515,39 +526,39 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) ->
515526
) -> libc::ssize_t
516527
}
517528

518-
let has_copy_file_range = HAS_COPY_FILE_RANGE.load(Ordering::Relaxed);
519-
let mut written = 0u64;
520-
while written < max_len {
521-
let copy_result = if has_copy_file_range {
522-
let bytes_to_copy = cmp::min(max_len - written, usize::MAX as u64);
523-
// cap to 1GB chunks in case u64::MAX is passed as max_len and the file has a non-zero seek position
524-
// this allows us to copy large chunks without hitting EOVERFLOW,
525-
// unless someone sets a file offset close to u64::MAX - 1GB, in which case a fallback would be required
526-
let bytes_to_copy = cmp::min(bytes_to_copy as usize, 0x4000_0000usize);
527-
let copy_result = unsafe {
528-
// We actually don't have to adjust the offsets,
529-
// because copy_file_range adjusts the file offset automatically
530-
cvt(copy_file_range(
531-
reader,
532-
ptr::null_mut(),
533-
writer,
534-
ptr::null_mut(),
535-
bytes_to_copy,
536-
0,
537-
))
529+
match HAS_COPY_FILE_RANGE.load(Ordering::Relaxed) {
530+
NOT_PROBED => {
531+
// EPERM can indicate seccomp filters or an immutable file.
532+
// To distinguish these cases we probe with invalid file descriptors which should result in EBADF if the syscall is supported
533+
// and some other error (ENOSYS or EPERM) if it's not available
534+
let result = unsafe {
535+
cvt(copy_file_range(INVALID_FD, ptr::null_mut(), INVALID_FD, ptr::null_mut(), 1, 0))
538536
};
539-
if let Err(ref copy_err) = copy_result {
540-
match copy_err.raw_os_error() {
541-
Some(libc::ENOSYS | libc::EPERM | libc::EOPNOTSUPP) => {
542-
HAS_COPY_FILE_RANGE.store(false, Ordering::Relaxed);
543-
}
544-
_ => {}
545-
}
537+
538+
if matches!(result.map_err(|e| e.raw_os_error()), Err(Some(libc::EBADF))) {
539+
HAS_COPY_FILE_RANGE.store(AVAILABLE, Ordering::Relaxed);
540+
} else {
541+
HAS_COPY_FILE_RANGE.store(UNAVAILABLE, Ordering::Relaxed);
542+
return CopyResult::Fallback(0);
546543
}
547-
copy_result
548-
} else {
549-
Err(Error::from_raw_os_error(libc::ENOSYS))
544+
}
545+
UNAVAILABLE => return CopyResult::Fallback(0),
546+
_ => {}
547+
};
548+
549+
let mut written = 0u64;
550+
while written < max_len {
551+
let bytes_to_copy = cmp::min(max_len - written, usize::MAX as u64);
552+
// cap to 1GB chunks in case u64::MAX is passed as max_len and the file has a non-zero seek position
553+
// this allows us to copy large chunks without hitting EOVERFLOW,
554+
// unless someone sets a file offset close to u64::MAX - 1GB, in which case a fallback would be required
555+
let bytes_to_copy = cmp::min(bytes_to_copy as usize, 0x4000_0000usize);
556+
let copy_result = unsafe {
557+
// We actually don't have to adjust the offsets,
558+
// because copy_file_range adjusts the file offset automatically
559+
cvt(copy_file_range(reader, ptr::null_mut(), writer, ptr::null_mut(), bytes_to_copy, 0))
550560
};
561+
551562
match copy_result {
552563
Ok(0) if written == 0 => {
553564
// fallback to work around several kernel bugs where copy_file_range will fail to
@@ -567,11 +578,14 @@ pub(super) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) ->
567578
libc::ENOSYS | libc::EXDEV | libc::EINVAL | libc::EPERM | libc::EOPNOTSUPP,
568579
) => {
569580
// Try fallback io::copy if either:
570-
// - Kernel version is < 4.5 (ENOSYS)
581+
// - Kernel version is < 4.5 (ENOSYS¹)
571582
// - Files are mounted on different fs (EXDEV)
572583
// - copy_file_range is broken in various ways on RHEL/CentOS 7 (EOPNOTSUPP)
573-
// - copy_file_range is disallowed, for example by seccomp (EPERM)
584+
// - copy_file_range file is immutable or syscall is blocked by seccomp¹ (EPERM)
574585
// - copy_file_range cannot be used with pipes or device nodes (EINVAL)
586+
//
587+
// ¹ these cases should be detected by the initial probe but we handle them here
588+
// anyway in case syscall interception changes during runtime
575589
assert_eq!(written, 0);
576590
CopyResult::Fallback(0)
577591
}

0 commit comments

Comments
 (0)