Skip to content

Commit

Permalink
Avoid possible copy_to_user pagefaults while cluster locked.
Browse files Browse the repository at this point in the history
dir_emit() will copy_to_user, which can pagefault. If this happens while
cluster locked, we could deadlock.

We use a single page to stage dir_emit data, and iterate between
fetching dirents while locked, and emitting them while not locked.

Signed-off-by: Auke Kok <[email protected]>
  • Loading branch information
aversecat committed Jul 23, 2024
1 parent 4b17483 commit a5bd044
Showing 1 changed file with 69 additions and 21 deletions.
90 changes: 69 additions & 21 deletions kmod/src/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <linux/uio.h>
#include <linux/xattr.h>
#include <linux/namei.h>
#include <linux/mm.h>

#include "format.h"
#include "file.h"
Expand Down Expand Up @@ -449,67 +450,114 @@ static int scoutfs_readdir(struct file *file, struct dir_context *ctx)
struct scoutfs_dirent *dent = NULL;
struct scoutfs_key last_key;
struct scoutfs_key key;
int name_len;
u64 pos;
struct page *page = NULL;
void *data = NULL;
int *name_len;
u64 page_pos;
int ret;
int complete = 0;
int i, j;

if (!dir_emit_dots(file, ctx))
return 0;

dent = alloc_dirent(SCOUTFS_NAME_LEN);
if (!dent) {
page = alloc_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
}
data = page_address(page);

init_dirent_key(&last_key, SCOUTFS_READDIR_TYPE, scoutfs_ino(inode),
SCOUTFS_DIRENT_LAST_POS, 0);

/*
* lock and fetch dirent items, until the page no longer fits
* a max size dirent (288b). Then unlock and dir_emit the ones
* we stored in the page.
*
* We store the name_len first (4b) and then the dirent. page_pos
* is maintained to keep progress. Once done fetching keys, we mark
* complete. The second iteration will stop once it reachs the count
* from the first iteration.
*
* If the dirents are ~20b size, we should get about 50-100 in a
* single page.
*/
cont:
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &dir_lock);
if (ret)
goto out;
goto out_page;

for (;;) {
page_pos = 0;

for (i = 0; page_pos + sizeof(int) + dirent_bytes(SCOUTFS_NAME_LEN) < PAGE_SIZE; i++) {
init_dirent_key(&key, SCOUTFS_READDIR_TYPE, scoutfs_ino(inode),
ctx->pos, 0);

dent = (struct scoutfs_dirent *)(data + page_pos + sizeof(int));
name_len = (int *)(data + page_pos);

ret = scoutfs_item_next(sb, &key, &last_key, dent,
dirent_bytes(SCOUTFS_NAME_LEN),
dir_lock);
if (ret < 0) {
if (ret == -ENOENT)
if (ret == -ENOENT) {
ret = 0;
break;
complete = 1;
break;
}
goto out_unlock;
}

name_len = ret - sizeof(struct scoutfs_dirent);
if (name_len < 1 || name_len > SCOUTFS_NAME_LEN) {
*name_len = ret - sizeof(struct scoutfs_dirent);
if (*name_len < 1 || *name_len > SCOUTFS_NAME_LEN) {
scoutfs_corruption(sb, SC_DIRENT_READDIR_NAME_LEN,
corrupt_dirent_readdir_name_len,
"dir_ino %llu pos %llu key "SK_FMT" len %d",
scoutfs_ino(inode),
ctx->pos,
SK_ARG(&key), name_len);
SK_ARG(&key), *name_len);
ret = -EIO;
goto out;
goto out_unlock;
}

pos = le64_to_cpu(key.skd_major);
ctx->pos = pos;
ctx->pos = le64_to_cpu(dent->pos) + 1;

if (!dir_emit(ctx, dent->name, name_len,
page_pos += dirent_bytes(*name_len) + sizeof(int);
}

scoutfs_unlock(sb, dir_lock, SCOUTFS_LOCK_READ);

page_pos = 0;

for (j = 0; j < i; j++) {
dent = (struct scoutfs_dirent *)(data + page_pos + sizeof(int));
name_len = (int *)(data + page_pos);

ctx->pos = le64_to_cpu(dent->pos);
if (!dir_emit(ctx, dent->name, *name_len,
le64_to_cpu(dent->ino),
dentry_type(dent->type))) {
ret = 0;
break;
goto out_page;
}

ctx->pos = pos + 1;
ctx->pos = le64_to_cpu(dent->pos) + 1;

page_pos += dirent_bytes(*name_len) + sizeof(int);
}

out:
scoutfs_unlock(sb, dir_lock, SCOUTFS_LOCK_READ);
/* repeat */
if (complete == 0)
goto cont;
ret = 0;

kfree(dent);
goto out_page;

out_unlock:
scoutfs_unlock(sb, dir_lock, SCOUTFS_LOCK_READ);
out_page:
if (page)
__free_page(page);
return ret;
}

Expand Down

0 comments on commit a5bd044

Please sign in to comment.