Skip to content

Commit d323fbf

Browse files
robnbehlendorf
authored andcommitted
FreeBSD: zfs_putpages: don't undirty pages until after write completes
In syncing mode, zfs_putpages() would put the entire range of pages onto the ZIL, then return VM_PAGER_OK for each page to the kernel. However, an associated zil_commit() or txg sync had not happened at this point, so the write may not actually be on disk. So, we rework that case to use a ZIL commit callback, and do the post-write work of undirtying the page and signaling completion there. We return VM_PAGER_PEND to the kernel instead so it knows that we will take care of it. The original version of this (238eab7) copied the Linux model and did the cleanup in a ZIL callback for both async and sync. This was a mistake, as FreeBSD does not have a separate "busy for writeback" flag like Linux which keeps the page usable. The full sbusy flag locks the entire page out until the itx callback fires, which for async is after txg sync, which could be literal seconds in the future. For the async case, the data is already on the DMU and the in-memory ZIL, which is sufficient for async writeback, so the old method of logging it without a callback, undirtying the page and returning is more than sufficient and reclaims that lost performance. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Alexander Motin <[email protected]> Reviewed-by: Mark Johnston <[email protected]> Signed-off-by: Rob Norris <[email protected]> Closes #17533
1 parent ee2a2d9 commit d323fbf

File tree

3 files changed

+75
-14
lines changed

3 files changed

+75
-14
lines changed

include/os/freebsd/spl/sys/vm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
extern const int zfs_vm_pagerret_bad;
3636
extern const int zfs_vm_pagerret_error;
3737
extern const int zfs_vm_pagerret_ok;
38+
extern const int zfs_vm_pagerret_pend;
3839
extern const int zfs_vm_pagerput_sync;
3940
extern const int zfs_vm_pagerput_inval;
4041

module/os/freebsd/spl/spl_vm.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
const int zfs_vm_pagerret_bad = VM_PAGER_BAD;
4444
const int zfs_vm_pagerret_error = VM_PAGER_ERROR;
4545
const int zfs_vm_pagerret_ok = VM_PAGER_OK;
46+
const int zfs_vm_pagerret_pend = VM_PAGER_PEND;
4647
const int zfs_vm_pagerput_sync = VM_PAGER_PUT_SYNC;
4748
const int zfs_vm_pagerput_inval = VM_PAGER_PUT_INVAL;
4849

module/os/freebsd/zfs/zfs_vnops_os.c

Lines changed: 73 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
2626
* Copyright (c) 2014 Integros [integros.com]
2727
* Copyright 2017 Nexenta Systems, Inc.
28+
* Copyright (c) 2025, Klara, Inc.
2829
*/
2930

3031
/* Portions Copyright 2007 Jeremy Teo */
@@ -4299,6 +4300,33 @@ zfs_freebsd_getpages(struct vop_getpages_args *ap)
42994300
ap->a_rahead));
43004301
}
43014302

4303+
typedef struct {
4304+
uint_t pca_npages;
4305+
vm_page_t pca_pages[];
4306+
} putpage_commit_arg_t;
4307+
4308+
static void
4309+
zfs_putpage_commit_cb(void *arg)
4310+
{
4311+
putpage_commit_arg_t *pca = arg;
4312+
vm_object_t object = pca->pca_pages[0]->object;
4313+
4314+
zfs_vmobject_wlock(object);
4315+
4316+
for (uint_t i = 0; i < pca->pca_npages; i++) {
4317+
vm_page_t pp = pca->pca_pages[i];
4318+
vm_page_undirty(pp);
4319+
vm_page_sunbusy(pp);
4320+
}
4321+
4322+
vm_object_pip_wakeupn(object, pca->pca_npages);
4323+
4324+
zfs_vmobject_wunlock(object);
4325+
4326+
kmem_free(pca,
4327+
offsetof(putpage_commit_arg_t, pca_pages[pca->pca_npages]));
4328+
}
4329+
43024330
static int
43034331
zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
43044332
int *rtvals)
@@ -4400,10 +4428,12 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
44004428
}
44014429

44024430
if (zp->z_blksz < PAGE_SIZE) {
4403-
for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4404-
tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4431+
vm_ooffset_t woff = off;
4432+
size_t wlen = len;
4433+
for (i = 0; wlen > 0; woff += tocopy, wlen -= tocopy, i++) {
4434+
tocopy = MIN(PAGE_SIZE, wlen);
44054435
va = zfs_map_page(ma[i], &sf);
4406-
dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4436+
dmu_write(zfsvfs->z_os, zp->z_id, woff, tocopy, va, tx);
44074437
zfs_unmap_page(sf);
44084438
}
44094439
} else {
@@ -4424,19 +4454,48 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
44244454
zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
44254455
err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
44264456
ASSERT0(err);
4427-
/*
4428-
* XXX we should be passing a callback to undirty
4429-
* but that would make the locking messier
4430-
*/
4431-
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
4432-
len, commit, B_FALSE, NULL, NULL);
44334457

4434-
zfs_vmobject_wlock(object);
4435-
for (i = 0; i < ncount; i++) {
4436-
rtvals[i] = zfs_vm_pagerret_ok;
4437-
vm_page_undirty(ma[i]);
4458+
if (commit) {
4459+
/*
4460+
* Caller requested that we commit immediately. We set
4461+
* a callback on the log entry, to be called once its
4462+
* on disk after the call to zil_commit() below. The
4463+
* pages will be undirtied and unbusied there.
4464+
*/
4465+
putpage_commit_arg_t *pca = kmem_alloc(
4466+
offsetof(putpage_commit_arg_t, pca_pages[ncount]),
4467+
KM_SLEEP);
4468+
pca->pca_npages = ncount;
4469+
memcpy(pca->pca_pages, ma, sizeof (vm_page_t) * ncount);
4470+
4471+
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len,
4472+
B_TRUE, B_FALSE, zfs_putpage_commit_cb, pca);
4473+
4474+
for (i = 0; i < ncount; i++)
4475+
rtvals[i] = zfs_vm_pagerret_pend;
4476+
} else {
4477+
/*
4478+
* Caller just wants the page written back somewhere,
4479+
* but doesn't need it committed yet. We've already
4480+
* written it back to the DMU, so we just need to put
4481+
* it on the async log, then undirty the page and
4482+
* return.
4483+
*
4484+
* We cannot use a callback here, because it would keep
4485+
* the page busy (locked) until it is eventually
4486+
* written down at txg sync.
4487+
*/
4488+
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len,
4489+
B_FALSE, B_FALSE, NULL, NULL);
4490+
4491+
zfs_vmobject_wlock(object);
4492+
for (i = 0; i < ncount; i++) {
4493+
rtvals[i] = zfs_vm_pagerret_ok;
4494+
vm_page_undirty(ma[i]);
4495+
}
4496+
zfs_vmobject_wunlock(object);
44384497
}
4439-
zfs_vmobject_wunlock(object);
4498+
44404499
VM_CNT_INC(v_vnodeout);
44414500
VM_CNT_ADD(v_vnodepgsout, ncount);
44424501
}

0 commit comments

Comments
 (0)