Skip to content

Commit 4d50558

Browse files
amir73ilgregkh
authored andcommitted
vfs: fix copy_file_range() regression in cross-fs copies
commit 868f9f2 upstream. A regression has been reported by Nicolas Boichat, found while using the copy_file_range syscall to copy a tracefs file. Before commit 5dae222 ("vfs: allow copy_file_range to copy across devices") the kernel would return -EXDEV to userspace when trying to copy a file across different filesystems. After this commit, the syscall doesn't fail anymore and instead returns zero (zero bytes copied), as this file's content is generated on-the-fly and thus reports a size of zero. Another regression has been reported by He Zhe - the assertion of WARN_ON_ONCE(ret == -EOPNOTSUPP) can be triggered from userspace when copying from a sysfs file whose read operation may return -EOPNOTSUPP. Since we do not have test coverage for copy_file_range() between any two types of filesystems, the best way to avoid these sort of issues in the future is for the kernel to be more picky about filesystems that are allowed to do copy_file_range(). This patch restores some cross-filesystem copy restrictions that existed prior to commit 5dae222 ("vfs: allow copy_file_range to copy across devices"), namely, cross-sb copy is not allowed for filesystems that do not implement ->copy_file_range(). Filesystems that do implement ->copy_file_range() have full control of the result - if this method returns an error, the error is returned to the user. Before this change this was only true for fs that did not implement the ->remap_file_range() operation (i.e. nfsv3). Filesystems that do not implement ->copy_file_range() still fall-back to the generic_copy_file_range() implementation when the copy is within the same sb. This helps the kernel can maintain a more consistent story about which filesystems support copy_file_range(). nfsd and ksmbd servers are modified to fall-back to the generic_copy_file_range() implementation in case vfs_copy_file_range() fails with -EOPNOTSUPP or -EXDEV, which preserves behavior of server-side-copy. fall-back to generic_copy_file_range() is not implemented for the smb operation FSCTL_DUPLICATE_EXTENTS_TO_FILE, which is arguably a correct change of behavior. Fixes: 5dae222 ("vfs: allow copy_file_range to copy across devices") Link: https://lore.kernel.org/linux-fsdevel/[email protected]/ Link: https://lore.kernel.org/linux-fsdevel/CANMq1KDZuxir2LM5jOTm0xx+BnvW=ZmpsG47CyHFJwnw7zSX6Q@mail.gmail.com/ Link: https://lore.kernel.org/linux-fsdevel/20210126135012.1.If45b7cdc3ff707bc1efa17f5366057d60603c45f@changeid/ Link: https://lore.kernel.org/linux-fsdevel/[email protected]/ Reported-by: Nicolas Boichat <[email protected]> Reported-by: kernel test robot <[email protected]> Signed-off-by: Luis Henriques <[email protected]> Fixes: 64bf5ff ("vfs: no fallback for ->copy_file_range") Link: https://lore.kernel.org/linux-fsdevel/[email protected]/ Reported-by: He Zhe <[email protected]> Tested-by: Namjae Jeon <[email protected]> Tested-by: Luis Henriques <[email protected]> Signed-off-by: Amir Goldstein <[email protected]> Signed-off-by: Linus Torvalds <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 115d941 commit 4d50558

File tree

4 files changed

+68
-37
lines changed

4 files changed

+68
-37
lines changed

fs/ksmbd/smb2pdu.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7794,14 +7794,24 @@ int smb2_ioctl(struct ksmbd_work *work)
77947794
src_off = le64_to_cpu(dup_ext->SourceFileOffset);
77957795
dst_off = le64_to_cpu(dup_ext->TargetFileOffset);
77967796
length = le64_to_cpu(dup_ext->ByteCount);
7797-
cloned = vfs_clone_file_range(fp_in->filp, src_off, fp_out->filp,
7798-
dst_off, length, 0);
7797+
/*
7798+
* XXX: It is not clear if FSCTL_DUPLICATE_EXTENTS_TO_FILE
7799+
* should fall back to vfs_copy_file_range(). This could be
7800+
* beneficial when re-exporting nfs/smb mount, but note that
7801+
* this can result in partial copy that returns an error status.
7802+
* If/when FSCTL_DUPLICATE_EXTENTS_TO_FILE_EX is implemented,
7803+
* fall back to vfs_copy_file_range(), should be avoided when
7804+
* the flag DUPLICATE_EXTENTS_DATA_EX_SOURCE_ATOMIC is set.
7805+
*/
7806+
cloned = vfs_clone_file_range(fp_in->filp, src_off,
7807+
fp_out->filp, dst_off, length, 0);
77997808
if (cloned == -EXDEV || cloned == -EOPNOTSUPP) {
78007809
ret = -EOPNOTSUPP;
78017810
goto dup_ext_out;
78027811
} else if (cloned != length) {
78037812
cloned = vfs_copy_file_range(fp_in->filp, src_off,
7804-
fp_out->filp, dst_off, length, 0);
7813+
fp_out->filp, dst_off,
7814+
length, 0);
78057815
if (cloned != length) {
78067816
if (cloned < 0)
78077817
ret = cloned;

fs/ksmbd/vfs.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,6 +1782,10 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
17821782

17831783
ret = vfs_copy_file_range(src_fp->filp, src_off,
17841784
dst_fp->filp, dst_off, len, 0);
1785+
if (ret == -EOPNOTSUPP || ret == -EXDEV)
1786+
ret = generic_copy_file_range(src_fp->filp, src_off,
1787+
dst_fp->filp, dst_off,
1788+
len, 0);
17851789
if (ret < 0)
17861790
return ret;
17871791

fs/nfsd/vfs.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,7 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
560560
ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
561561
u64 dst_pos, u64 count)
562562
{
563+
ssize_t ret;
563564

564565
/*
565566
* Limit copy to 4MB to prevent indefinitely blocking an nfsd
@@ -570,7 +571,12 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
570571
* limit like this and pipeline multiple COPY requests.
571572
*/
572573
count = min_t(u64, count, 1 << 22);
573-
return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
574+
ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
575+
576+
if (ret == -EOPNOTSUPP || ret == -EXDEV)
577+
ret = generic_copy_file_range(src, src_pos, dst, dst_pos,
578+
count, 0);
579+
return ret;
574580
}
575581

576582
__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,

fs/read_write.c

Lines changed: 44 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1384,28 +1384,6 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
13841384
}
13851385
EXPORT_SYMBOL(generic_copy_file_range);
13861386

1387-
static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
1388-
struct file *file_out, loff_t pos_out,
1389-
size_t len, unsigned int flags)
1390-
{
1391-
/*
1392-
* Although we now allow filesystems to handle cross sb copy, passing
1393-
* a file of the wrong filesystem type to filesystem driver can result
1394-
* in an attempt to dereference the wrong type of ->private_data, so
1395-
* avoid doing that until we really have a good reason. NFS defines
1396-
* several different file_system_type structures, but they all end up
1397-
* using the same ->copy_file_range() function pointer.
1398-
*/
1399-
if (file_out->f_op->copy_file_range &&
1400-
file_out->f_op->copy_file_range == file_in->f_op->copy_file_range)
1401-
return file_out->f_op->copy_file_range(file_in, pos_in,
1402-
file_out, pos_out,
1403-
len, flags);
1404-
1405-
return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
1406-
flags);
1407-
}
1408-
14091387
/*
14101388
* Performs necessary checks before doing a file copy
14111389
*
@@ -1427,6 +1405,24 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
14271405
if (ret)
14281406
return ret;
14291407

1408+
/*
1409+
* We allow some filesystems to handle cross sb copy, but passing
1410+
* a file of the wrong filesystem type to filesystem driver can result
1411+
* in an attempt to dereference the wrong type of ->private_data, so
1412+
* avoid doing that until we really have a good reason.
1413+
*
1414+
* nfs and cifs define several different file_system_type structures
1415+
* and several different sets of file_operations, but they all end up
1416+
* using the same ->copy_file_range() function pointer.
1417+
*/
1418+
if (file_out->f_op->copy_file_range) {
1419+
if (file_in->f_op->copy_file_range !=
1420+
file_out->f_op->copy_file_range)
1421+
return -EXDEV;
1422+
} else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) {
1423+
return -EXDEV;
1424+
}
1425+
14301426
/* Don't touch certain kinds of inodes */
14311427
if (IS_IMMUTABLE(inode_out))
14321428
return -EPERM;
@@ -1492,26 +1488,41 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
14921488
file_start_write(file_out);
14931489

14941490
/*
1495-
* Try cloning first, this is supported by more file systems, and
1496-
* more efficient if both clone and copy are supported (e.g. NFS).
1491+
* Cloning is supported by more file systems, so we implement copy on
1492+
* same sb using clone, but for filesystems where both clone and copy
1493+
* are supported (e.g. nfs,cifs), we only call the copy method.
14971494
*/
1495+
if (file_out->f_op->copy_file_range) {
1496+
ret = file_out->f_op->copy_file_range(file_in, pos_in,
1497+
file_out, pos_out,
1498+
len, flags);
1499+
goto done;
1500+
}
1501+
14981502
if (file_in->f_op->remap_file_range &&
14991503
file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
1500-
loff_t cloned;
1501-
1502-
cloned = file_in->f_op->remap_file_range(file_in, pos_in,
1504+
ret = file_in->f_op->remap_file_range(file_in, pos_in,
15031505
file_out, pos_out,
15041506
min_t(loff_t, MAX_RW_COUNT, len),
15051507
REMAP_FILE_CAN_SHORTEN);
1506-
if (cloned > 0) {
1507-
ret = cloned;
1508+
if (ret > 0)
15081509
goto done;
1509-
}
15101510
}
15111511

1512-
ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
1513-
flags);
1514-
WARN_ON_ONCE(ret == -EOPNOTSUPP);
1512+
/*
1513+
* We can get here for same sb copy of filesystems that do not implement
1514+
* ->copy_file_range() in case filesystem does not support clone or in
1515+
* case filesystem supports clone but rejected the clone request (e.g.
1516+
* because it was not block aligned).
1517+
*
1518+
* In both cases, fall back to kernel copy so we are able to maintain a
1519+
* consistent story about which filesystems support copy_file_range()
1520+
* and which filesystems do not, that will allow userspace tools to
1521+
* make consistent desicions w.r.t using copy_file_range().
1522+
*/
1523+
ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
1524+
flags);
1525+
15151526
done:
15161527
if (ret > 0) {
15171528
fsnotify_access(file_in);

0 commit comments

Comments
 (0)