Re: [PATCH] fuse: enable larger read buffers for readdir.

From: Bernd Schubert
Date: Wed Jul 26 2023 - 09:56:17 EST




On 7/26/23 12:59, Jaco Kroon wrote:
Signed-off-by: Jaco Kroon <jaco@xxxxxxxxx>
---
fs/fuse/Kconfig | 16 ++++++++++++++++
fs/fuse/readdir.c | 42 ++++++++++++++++++++++++------------------
2 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 038ed0b9aaa5..0783f9ee5cd3 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -18,6 +18,22 @@ config FUSE_FS
If you want to develop a userspace FS, or if you want to use
a filesystem based on FUSE, answer Y or M.
+config FUSE_READDIR_ORDER
+ int
+ range 0 5
+ default 5
+ help
+ readdir performance varies greatly depending on the size of the read.
+ Larger buffers results in larger reads, thus fewer reads and higher
+ performance in return.
+
+ You may want to reduce this value on seriously constrained memory
+ systems where 128KiB (assuming 4KiB pages) cache pages is not ideal.
+
+ This value reprents the order of the number of pages to allocate (ie,
+ the shift value). A value of 0 is thus 1 page (4KiB) where 5 is 32
+ pages (128KiB).
+

I like the idea of a larger readdir size, but shouldn't that be a server/daemon/library decision which size to use, instead of kernel compile time? So should be part of FUSE_INIT negotiation?

config CUSE
tristate "Character device in Userspace support"
depends on FUSE_FS
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index dc603479b30e..98c62b623240 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -13,6 +13,12 @@
#include <linux/pagemap.h>
#include <linux/highmem.h>
+#define READDIR_PAGES_ORDER CONFIG_FUSE_READDIR_ORDER
+#define READDIR_PAGES (1 << READDIR_PAGES_ORDER)
+#define READDIR_PAGES_SIZE (PAGE_SIZE << READDIR_PAGES_ORDER)
+#define READDIR_PAGES_MASK (READDIR_PAGES_SIZE - 1)
+#define READDIR_PAGES_SHIFT (PAGE_SHIFT + READDIR_PAGES_ORDER)
+
static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
{
struct fuse_conn *fc = get_fuse_conn(dir);
@@ -52,10 +58,10 @@ static void fuse_add_dirent_to_cache(struct file *file,
}
version = fi->rdc.version;
size = fi->rdc.size;
- offset = size & ~PAGE_MASK;
- index = size >> PAGE_SHIFT;
+ offset = size & ~READDIR_PAGES_MASK;
+ index = size >> READDIR_PAGES_SHIFT;
/* Dirent doesn't fit in current page? Jump to next page. */
- if (offset + reclen > PAGE_SIZE) {
+ if (offset + reclen > READDIR_PAGES_SIZE) {
index++;
offset = 0;
}
@@ -83,7 +89,7 @@ static void fuse_add_dirent_to_cache(struct file *file,
}
memcpy(addr + offset, dirent, reclen);
kunmap_local(addr);
- fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
+ fi->rdc.size = (index << READDIR_PAGES_SHIFT) + offset + reclen;
fi->rdc.pos = dirent->off;
unlock:
spin_unlock(&fi->rdc.lock);
@@ -104,7 +110,7 @@ static void fuse_readdir_cache_end(struct file *file, loff_t pos)
}
fi->rdc.cached = true;
- end = ALIGN(fi->rdc.size, PAGE_SIZE);
+ end = ALIGN(fi->rdc.size, READDIR_PAGES_SIZE);
spin_unlock(&fi->rdc.lock);
/* truncate unused tail of cache */
@@ -328,25 +334,25 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_io_args ia = {};
struct fuse_args_pages *ap = &ia.ap;
- struct fuse_page_desc desc = { .length = PAGE_SIZE };
+ struct fuse_page_desc desc = { .length = READDIR_PAGES_SIZE };
u64 attr_version = 0;
bool locked;
- page = alloc_page(GFP_KERNEL);
+ page = alloc_pages(GFP_KERNEL, READDIR_PAGES_ORDER);

I guess that should become folio alloc(), one way or the other. Now I think order 0 was chosen before to avoid risk of allocation failure. I guess it might work to try a large size and to fall back to 0 when that failed. Or fail back to the slower vmalloc.

if (!page)
return -ENOMEM;
plus = fuse_use_readdirplus(inode, ctx);
ap->args.out_pages = true;
- ap->num_pages = 1;
+ ap->num_pages = READDIR_PAGES;
ap->pages = &page;
ap->descs = &desc;
if (plus) {
attr_version = fuse_get_attr_version(fm->fc);
- fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
+ fuse_read_args_fill(&ia, file, ctx->pos, READDIR_PAGES_SIZE,
FUSE_READDIRPLUS);
} else {
- fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
+ fuse_read_args_fill(&ia, file, ctx->pos, READDIR_PAGES_SIZE,
FUSE_READDIR);
}
locked = fuse_lock_inode(inode);
@@ -383,7 +389,7 @@ static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
void *addr, unsigned int size,
struct dir_context *ctx)
{
- unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
+ unsigned int offset = ff->readdir.cache_off & ~READDIR_PAGES_MASK;
enum fuse_parse_result res = FOUND_NONE;
WARN_ON(offset >= size);
@@ -504,16 +510,16 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
WARN_ON(fi->rdc.size < ff->readdir.cache_off);
- index = ff->readdir.cache_off >> PAGE_SHIFT;
+ index = ff->readdir.cache_off >> READDIR_PAGES_SHIFT;
- if (index == (fi->rdc.size >> PAGE_SHIFT))
- size = fi->rdc.size & ~PAGE_MASK;
+ if (index == (fi->rdc.size >> READDIR_PAGES_SHIFT))
+ size = fi->rdc.size & ~READDIR_PAGES_MASK;
else
- size = PAGE_SIZE;
+ size = READDIR_PAGES_SIZE;
spin_unlock(&fi->rdc.lock);
/* EOF? */
- if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
+ if ((ff->readdir.cache_off & ~READDIR_PAGES_MASK) == size)
return 0;
page = find_get_page_flags(file->f_mapping, index,
@@ -559,9 +565,9 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
if (res == FOUND_ALL)
return 0;
- if (size == PAGE_SIZE) {
+ if (size == READDIR_PAGES_SIZE) {
/* We hit end of page: skip to next page. */
- ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
+ ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, READDIR_PAGES_SIZE);
goto retry;
}

Thanks,
Bernd