Skip to content

Commit 7bcb7f0

Browse files
authored
File incorrectly zeroed when receiving incremental stream that toggles -L
Background: By increasing the recordsize property above the default of 128KB, a filesystem may have "large" blocks. By default, a send stream of such a filesystem does not contain large WRITE records, instead it decreases objects' block sizes to 128KB and splits the large blocks into 128KB blocks, allowing the large-block filesystem to be received by a system that does not support the `large_blocks` feature. A send stream generated by `zfs send -L` (or `--large-block`) preserves the large block size on the receiving system, by using large WRITE records. When receiving an incremental send stream for a filesystem with large blocks, if the send stream's -L flag was toggled, a bug is encountered in which the file's contents are incorrectly zeroed out. The contents of any blocks that were not modified by this send stream will be lost. "Toggled" means that the previous send used `-L`, but this incremental does not use `-L` (-L to no-L); or that the previous send did not use `-L`, but this incremental does use `-L` (no-L to -L). Changes: This commit addresses the problem with several changes to the semantics of zfs send/receive: 1. "-L to no-L" incrementals are rejected. If the previous send used `-L`, but this incremental does not use `-L`, the `zfs receive` will fail with this error message: incremental send stream requires -L (--large-block), to match previous receive. 2. "no-L to -L" incrementals are handled correctly, preserving the smaller (128KB) block size of any already-received files that used large blocks on the sending system but were split by `zfs send` without the `-L` flag. 3. A new send stream format flag is added, `SWITCH_TO_LARGE_BLOCKS`. This feature indicates that we can correctly handle "no-L to -L" incrementals. This flag is currently not set on any send streams. In the future, we intend for incremental send streams of snapshots that have large blocks to use `-L` by default, and these streams will also have the `SWITCH_TO_LARGE_BLOCKS` feature set. This ensures that streams from the default use of `zfs send` won't encounter the bug mentioned above, because they can't be received by software with the bug. Implementation notes: To facilitate accessing the ZPL's generation number, `zfs_space_delta_cb()` has been renamed to `zpl_get_file_info()` and restructured to fill in a struct with ZPL-specific info including owner and generation. In the "no-L to -L" case, if this is a compressed send stream (from `zfs send -cL`), large WRITE records that are being written to small (128KB) blocksize files need to be decompressed so that they can be written split up into multiple blocks. The zio pipeline will recompress each smaller block individually. A new test case, `send-L_toggle`, is added, which tests the "no-L to -L" case and verifies that we get an error for the "-L to no-L" case. Reviewed-by: Paul Dagnelie <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Signed-off-by: Matthew Ahrens <[email protected]> Closes #6224 Closes #10383
1 parent 6722be2 commit 7bcb7f0

File tree

15 files changed

+500
-165
lines changed

15 files changed

+500
-165
lines changed

cmd/zhack/zhack.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@ fatal(spa_t *spa, void *tag, const char *fmt, ...)
103103

104104
/* ARGSUSED */
105105
static int
106-
space_delta_cb(dmu_object_type_t bonustype, void *data,
107-
uint64_t *userp, uint64_t *groupp, uint64_t *projectp)
106+
space_delta_cb(dmu_object_type_t bonustype, const void *data,
107+
zfs_file_info_t *zoi)
108108
{
109109
/*
110110
* Is it a valid type of object to track?

include/sys/dmu.h

+10-3
Original file line numberDiff line numberDiff line change
@@ -1013,10 +1013,17 @@ extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
10131013
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
10141014
uint64_t *idp, uint64_t *offp);
10151015

1016-
typedef int objset_used_cb_t(dmu_object_type_t bonustype,
1017-
void *bonus, uint64_t *userp, uint64_t *groupp, uint64_t *projectp);
1016+
typedef struct zfs_file_info {
1017+
uint64_t zfi_user;
1018+
uint64_t zfi_group;
1019+
uint64_t zfi_project;
1020+
uint64_t zfi_generation;
1021+
} zfs_file_info_t;
1022+
1023+
typedef int file_info_cb_t(dmu_object_type_t bonustype, const void *data,
1024+
struct zfs_file_info *zoi);
10181025
extern void dmu_objset_register_type(dmu_objset_type_t ost,
1019-
objset_used_cb_t *cb);
1026+
file_info_cb_t *cb);
10201027
extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
10211028
extern void *dmu_objset_get_user(objset_t *os);
10221029

include/sys/dmu_objset.h

+2
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,8 @@ boolean_t dmu_objset_projectquota_enabled(objset_t *os);
254254
boolean_t dmu_objset_projectquota_present(objset_t *os);
255255
boolean_t dmu_objset_projectquota_upgradable(objset_t *os);
256256
void dmu_objset_id_quota_upgrade(objset_t *os);
257+
int dmu_get_file_info(objset_t *os, dmu_object_type_t bonustype,
258+
const void *data, zfs_file_info_t *zfi);
257259

258260
int dmu_fsname(const char *snapname, char *buf);
259261

include/sys/fs/zfs.h

+1
Original file line numberDiff line numberDiff line change
@@ -1336,6 +1336,7 @@ typedef enum {
13361336
ZFS_ERR_EXPORT_IN_PROGRESS,
13371337
ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR,
13381338
ZFS_ERR_STREAM_TRUNCATED,
1339+
ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH,
13391340
} zfs_errno_t;
13401341

13411342
/*

include/sys/zfs_ioctl.h

+17-1
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,22 @@ typedef enum drr_headertype {
107107
#define DMU_BACKUP_FEATURE_RAW (1 << 24)
108108
/* flag #25 is reserved for the ZSTD compression feature */
109109
#define DMU_BACKUP_FEATURE_HOLDS (1 << 26)
110+
/*
111+
* The SWITCH_TO_LARGE_BLOCKS feature indicates that we can receive
112+
* incremental LARGE_BLOCKS streams (those with WRITE records of >128KB) even
113+
* if the previous send did not use LARGE_BLOCKS, and thus its large blocks
114+
* were split into multiple 128KB WRITE records. (See
115+
* flush_write_batch_impl() and receive_object()). Older software that does
116+
* not support this flag may encounter a bug when switching to large blocks,
117+
* which causes files to incorrectly be zeroed.
118+
*
119+
* This flag is currently not set on any send streams. In the future, we
120+
* intend for incremental send streams of snapshots that have large blocks to
121+
* use LARGE_BLOCKS by default, and these streams will also have the
122+
* SWITCH_TO_LARGE_BLOCKS feature set. This ensures that streams from the
123+
* default use of "zfs send" won't encounter the bug mentioned above.
124+
*/
125+
#define DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS (1 << 27)
110126

111127
/*
112128
* Mask of all supported backup features
@@ -116,7 +132,7 @@ typedef enum drr_headertype {
116132
DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \
117133
DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
118134
DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS | \
119-
DMU_BACKUP_FEATURE_REDACTED)
135+
DMU_BACKUP_FEATURE_REDACTED | DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS)
120136

121137
/* Are all features in the given flag word currently supported? */
122138
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))

include/sys/zfs_quota.h

+14-15
Original file line numberDiff line numberDiff line change
@@ -24,23 +24,22 @@
2424

2525
#include <sys/dmu.h>
2626
#include <sys/fs/zfs.h>
27-
#include <sys/zfs_vfsops.h>
2827

29-
extern int zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
30-
uint64_t *userp, uint64_t *groupp, uint64_t *projectp);
28+
struct zfsvfs;
29+
struct zfs_file_info_t;
3130

32-
extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
33-
const char *domain, uint64_t rid, uint64_t *valuep);
34-
extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
35-
uint64_t *cookiep, void *vbuf, uint64_t *bufsizep);
36-
extern int zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
37-
const char *domain, uint64_t rid, uint64_t quota);
31+
extern int zpl_get_file_info(dmu_object_type_t,
32+
const void *, struct zfs_file_info *);
3833

39-
extern boolean_t zfs_id_overobjquota(zfsvfs_t *zfsvfs, uint64_t usedobj,
40-
uint64_t id);
41-
extern boolean_t zfs_id_overblockquota(zfsvfs_t *zfsvfs, uint64_t usedobj,
42-
uint64_t id);
43-
extern boolean_t zfs_id_overquota(zfsvfs_t *zfsvfs, uint64_t usedobj,
44-
uint64_t id);
34+
extern int zfs_userspace_one(struct zfsvfs *, zfs_userquota_prop_t,
35+
const char *, uint64_t, uint64_t *);
36+
extern int zfs_userspace_many(struct zfsvfs *, zfs_userquota_prop_t,
37+
uint64_t *, void *, uint64_t *);
38+
extern int zfs_set_userquota(struct zfsvfs *, zfs_userquota_prop_t,
39+
const char *, uint64_t, uint64_t);
40+
41+
extern boolean_t zfs_id_overobjquota(struct zfsvfs *, uint64_t, uint64_t);
42+
extern boolean_t zfs_id_overblockquota(struct zfsvfs *, uint64_t, uint64_t);
43+
extern boolean_t zfs_id_overquota(struct zfsvfs *, uint64_t, uint64_t);
4544

4645
#endif

lib/libzfs/libzfs_sendrecv.c

+6
Original file line numberDiff line numberDiff line change
@@ -4802,6 +4802,12 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
48024802
ioctl_err == ECKSUM);
48034803
(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
48044804
break;
4805+
case ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH:
4806+
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4807+
"incremental send stream requires -L "
4808+
"(--large-block), to match previous receive."));
4809+
(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4810+
break;
48054811
case ENOTSUP:
48064812
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
48074813
"pool must be upgraded to receive this stream."));

module/os/freebsd/zfs/zfs_vfsops.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -2202,7 +2202,7 @@ zfs_init(void)
22022202
*/
22032203
zfs_vnodes_adjust();
22042204

2205-
dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
2205+
dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
22062206

22072207
zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
22082208
}

module/os/linux/zfs/zfs_vfsops.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -2131,7 +2131,7 @@ zfs_init(void)
21312131
{
21322132
zfsctl_init();
21332133
zfs_znode_init();
2134-
dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
2134+
dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
21352135
register_filesystem(&zpl_fs_type);
21362136
}
21372137

module/zfs/dmu_objset.c

+28-21
Original file line numberDiff line numberDiff line change
@@ -1728,19 +1728,29 @@ dmu_objset_is_dirty(objset_t *os, uint64_t txg)
17281728
return (!multilist_is_empty(os->os_dirty_dnodes[txg & TXG_MASK]));
17291729
}
17301730

1731-
static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
1731+
static file_info_cb_t *file_cbs[DMU_OST_NUMTYPES];
17321732

17331733
void
1734-
dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
1734+
dmu_objset_register_type(dmu_objset_type_t ost, file_info_cb_t *cb)
17351735
{
1736-
used_cbs[ost] = cb;
1736+
file_cbs[ost] = cb;
1737+
}
1738+
1739+
int
1740+
dmu_get_file_info(objset_t *os, dmu_object_type_t bonustype, const void *data,
1741+
zfs_file_info_t *zfi)
1742+
{
1743+
file_info_cb_t *cb = file_cbs[os->os_phys->os_type];
1744+
if (cb == NULL)
1745+
return (EINVAL);
1746+
return (cb(bonustype, data, zfi));
17371747
}
17381748

17391749
boolean_t
17401750
dmu_objset_userused_enabled(objset_t *os)
17411751
{
17421752
return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
1743-
used_cbs[os->os_phys->os_type] != NULL &&
1753+
file_cbs[os->os_phys->os_type] != NULL &&
17441754
DMU_USERUSED_DNODE(os) != NULL);
17451755
}
17461756

@@ -1754,7 +1764,7 @@ dmu_objset_userobjused_enabled(objset_t *os)
17541764
boolean_t
17551765
dmu_objset_projectquota_enabled(objset_t *os)
17561766
{
1757-
return (used_cbs[os->os_phys->os_type] != NULL &&
1767+
return (file_cbs[os->os_phys->os_type] != NULL &&
17581768
DMU_PROJECTUSED_DNODE(os) != NULL &&
17591769
spa_feature_is_enabled(os->os_spa, SPA_FEATURE_PROJECT_QUOTA));
17601770
}
@@ -2089,9 +2099,6 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
20892099
objset_t *os = dn->dn_objset;
20902100
void *data = NULL;
20912101
dmu_buf_impl_t *db = NULL;
2092-
uint64_t *user = NULL;
2093-
uint64_t *group = NULL;
2094-
uint64_t *project = NULL;
20952102
int flags = dn->dn_id_flags;
20962103
int error;
20972104
boolean_t have_spill = B_FALSE;
@@ -2145,23 +2152,23 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
21452152
return;
21462153
}
21472154

2148-
if (before) {
2149-
ASSERT(data);
2150-
user = &dn->dn_olduid;
2151-
group = &dn->dn_oldgid;
2152-
project = &dn->dn_oldprojid;
2153-
} else if (data) {
2154-
user = &dn->dn_newuid;
2155-
group = &dn->dn_newgid;
2156-
project = &dn->dn_newprojid;
2157-
}
2158-
21592155
/*
21602156
* Must always call the callback in case the object
21612157
* type has changed and that type isn't an object type to track
21622158
*/
2163-
error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
2164-
user, group, project);
2159+
zfs_file_info_t zfi;
2160+
error = file_cbs[os->os_phys->os_type](dn->dn_bonustype, data, &zfi);
2161+
2162+
if (before) {
2163+
ASSERT(data);
2164+
dn->dn_olduid = zfi.zfi_user;
2165+
dn->dn_oldgid = zfi.zfi_group;
2166+
dn->dn_oldprojid = zfi.zfi_project;
2167+
} else if (data) {
2168+
dn->dn_newuid = zfi.zfi_user;
2169+
dn->dn_newgid = zfi.zfi_group;
2170+
dn->dn_newprojid = zfi.zfi_project;
2171+
}
21652172

21662173
/*
21672174
* Preserve existing uid/gid when the callback can't determine

0 commit comments

Comments
 (0)