Re: [RFC PATCH v3 6/8] fuse: implementation of lookup_handle+statx compound operation

From: Amir Goldstein

Date: Thu Feb 26 2026 - 05:10:10 EST


On Thu, Feb 26, 2026 at 10:54 AM Luis Henriques <luis@xxxxxxxxxx> wrote:
>
> Hi Amir,
>
> On Wed, Feb 25 2026, Amir Goldstein wrote:
>
> > On Wed, Feb 25, 2026 at 12:25 PM Luis Henriques <luis@xxxxxxxxxx> wrote:
> >>
> >> The implementation of lookup_handle+statx compound operation extends the
> >> lookup operation so that a file handle is be passed into the kernel. It
> >> also needs to include an extra inarg, so that the parent directory file
> >> handle can be sent to user-space. This extra inarg is added as an extension
> >> header to the request.
> >>
> >> By having a separate statx including in a compound operation allows the
> >> attr to be dropped from the lookup_handle request, simplifying the
> >> traditional FUSE lookup operation.
> >>
> >> Signed-off-by: Luis Henriques <luis@xxxxxxxxxx>
> >> ---
> >> fs/fuse/dir.c | 294 +++++++++++++++++++++++++++++++++++---
> >> fs/fuse/fuse_i.h | 23 ++-
> >> fs/fuse/inode.c | 48 +++++--
> >> fs/fuse/readdir.c | 2 +-
> >> include/uapi/linux/fuse.h | 23 ++-
> >> 5 files changed, 355 insertions(+), 35 deletions(-)
> >>
> >> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> >> index 5c0f1364c392..7fa8c405f1a3 100644
> >> --- a/fs/fuse/dir.c
> >> +++ b/fs/fuse/dir.c
> >> @@ -21,6 +21,7 @@
> >> #include <linux/security.h>
> >> #include <linux/types.h>
> >> #include <linux/kernel.h>
> >> +#include <linux/exportfs.h>
> >>
> >> static bool __read_mostly allow_sys_admin_access;
> >> module_param(allow_sys_admin_access, bool, 0644);
> >> @@ -372,6 +373,47 @@ static void fuse_lookup_init(struct fuse_args *args, u64 nodeid,
> >> args->out_args[0].value = outarg;
> >> }
> >>
> >> +static int do_lookup_handle_statx(struct fuse_mount *fm, u64 parent_nodeid,
> >> + struct inode *parent_inode,
> >> + const struct qstr *name,
> >> + struct fuse_entry2_out *lookup_out,
> >> + struct fuse_statx_out *statx_out,
> >> + struct fuse_file_handle **fh);
> >> +static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr);
> >> +static int do_reval_lookup(struct fuse_mount *fm, u64 parent_nodeid,
> >> + const struct qstr *name, u64 *nodeid,
> >> + u64 *generation, u64 *attr_valid,
> >> + struct fuse_attr *attr, struct fuse_file_handle **fh)
> >> +{
> >> + struct fuse_entry_out entry_out;
> >> + struct fuse_entry2_out lookup_out;
> >> + struct fuse_statx_out statx_out;
> >> + FUSE_ARGS(lookup_args);
> >> + int ret = 0;
> >> +
> >> + if (fm->fc->lookup_handle) {
> >> + ret = do_lookup_handle_statx(fm, parent_nodeid, NULL, name,
> >> + &lookup_out, &statx_out, fh);
> >> + if (!ret) {
> >> + *nodeid = lookup_out.nodeid;
> >> + *generation = lookup_out.generation;
> >> + *attr_valid = fuse_time_to_jiffies(lookup_out.entry_valid,
> >> + lookup_out.entry_valid_nsec);
> >> + fuse_statx_to_attr(&statx_out.stat, attr);
> >> + }
> >> + } else {
> >> + fuse_lookup_init(&lookup_args, parent_nodeid, name, &entry_out);
> >> + ret = fuse_simple_request(fm, &lookup_args);
> >> + if (!ret) {
> >> + *nodeid = entry_out.nodeid;
> >> + *generation = entry_out.generation;
> >> + *attr_valid = ATTR_TIMEOUT(&entry_out);
> >> + memcpy(attr, &entry_out.attr, sizeof(*attr));
> >> + }
> >> + }
> >> +
> >> + return ret;
> >> +}
> >> /*
> >> * Check whether the dentry is still valid
> >> *
> >> @@ -399,10 +441,11 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
> >> goto invalid;
> >> else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
> >> (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
> >> - struct fuse_entry_out outarg;
> >> - FUSE_ARGS(args);
> >> struct fuse_forget_link *forget;
> >> + struct fuse_file_handle *fh = NULL;
> >> u64 attr_version;
> >> + u64 nodeid, generation, attr_valid;
> >> + struct fuse_attr attr;
> >>
> >> /* For negative dentries, always do a fresh lookup */
> >> if (!inode)
> >> @@ -421,35 +464,36 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
> >>
> >> attr_version = fuse_get_attr_version(fm->fc);
> >>
> >> - fuse_lookup_init(&args, get_node_id(dir), name, &outarg);
> >> - ret = fuse_simple_request(fm, &args);
> >> + ret = do_reval_lookup(fm, get_node_id(dir), name, &nodeid,
> >> + &generation, &attr_valid, &attr, &fh);
> >> /* Zero nodeid is same as -ENOENT */
> >> - if (!ret && !outarg.nodeid)
> >> + if (!ret && !nodeid)
> >> ret = -ENOENT;
> >> if (!ret) {
> >> fi = get_fuse_inode(inode);
> >> - if (outarg.nodeid != get_node_id(inode) ||
> >> - (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
> >> - fuse_queue_forget(fm->fc, forget,
> >> - outarg.nodeid, 1);
> >> + if (!fuse_file_handle_is_equal(fm->fc, fi->fh, fh) ||
> >> + nodeid != get_node_id(inode) ||
> >> + (bool) IS_AUTOMOUNT(inode) != (bool) (attr.flags & FUSE_ATTR_SUBMOUNT)) {
> >> + fuse_queue_forget(fm->fc, forget, nodeid, 1);
> >> + kfree(fh);
> >> goto invalid;
> >> }
> >> spin_lock(&fi->lock);
> >> fi->nlookup++;
> >> spin_unlock(&fi->lock);
> >> }
> >> + kfree(fh);
> >> kfree(forget);
> >> if (ret == -ENOMEM || ret == -EINTR)
> >> goto out;
> >> - if (ret || fuse_invalid_attr(&outarg.attr) ||
> >> - fuse_stale_inode(inode, outarg.generation, &outarg.attr))
> >> + if (ret || fuse_invalid_attr(&attr) ||
> >> + fuse_stale_inode(inode, generation, &attr))
> >> goto invalid;
> >>
> >> forget_all_cached_acls(inode);
> >> - fuse_change_attributes(inode, &outarg.attr, NULL,
> >> - ATTR_TIMEOUT(&outarg),
> >> + fuse_change_attributes(inode, &attr, NULL, attr_valid,
> >> attr_version);
> >> - fuse_change_entry_timeout(entry, &outarg);
> >> + fuse_dentry_settime(entry, attr_valid);
> >> } else if (inode) {
> >> fi = get_fuse_inode(inode);
> >> if (flags & LOOKUP_RCU) {
> >> @@ -546,8 +590,215 @@ bool fuse_invalid_attr(struct fuse_attr *attr)
> >> return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
> >> }
> >>
> >> -int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
> >> - u64 *time, struct inode **inode)
> >> +static int create_ext_handle(struct fuse_in_arg *ext, struct fuse_inode *fi)
> >> +{
> >> + struct fuse_ext_header *xh;
> >> + struct fuse_file_handle *fh;
> >> + u32 len;
> >> +
> >> + len = fuse_ext_size(sizeof(*fi->fh) + fi->fh->size);
> >> + xh = fuse_extend_arg(ext, len);
> >> + if (!xh)
> >> + return -ENOMEM;
> >> +
> >> + xh->size = len;
> >> + xh->type = FUSE_EXT_HANDLE;
> >> + fh = (struct fuse_file_handle *)&xh[1];
> >> + fh->size = fi->fh->size;
> >> + memcpy(fh->handle, fi->fh->handle, fh->size);
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +static int fuse_lookup_handle_init(struct fuse_args *args, u64 nodeid,
> >> + struct fuse_inode *fi,
> >> + const struct qstr *name,
> >> + struct fuse_entry2_out *outarg)
> >> +{
> >> + struct fuse_file_handle *fh;
> >
> > Considering that fuse has long used uint64_t fh as the convention
> > for a file id all over the code, it would be better to pick a different
> > convention for fuse file handle, perhaps ffh, or fhandle?
>
> Good point, I'll make sure next revision will follow a different
> convention.
>
> >> + size_t fh_size = sizeof(*fh) + MAX_HANDLE_SZ;
> >
> > I don't remember what we concluded last time, but
> > shouldn't the server request max_handle_sz at init?
> > This constant is quite arbitrary.
>
> You're right, I should have pointed that out in the cover letter at least.
> In the previous version that maximum size was indeed provided by the
> server. But from the discussion here [0] I understood that this
> negotiation should be dropped. Here's what Miklos suggested:
>
> > How about allocating variable length arguments on demand? That would
> > allow getting rid of max_handle_size negotiation.
> >
> > args->out_var_alloc = true;
> > args->out_args[1].size = MAX_HANDLE_SZ;
> > args->out_args[1].value = NULL; /* Will be allocated to the actual size of the handle */
>
> Obviously that's not what the code is currently doing. The plan is to
> eventually set the .value to NULL and do the allocation elsewhere,
> according to the actual size returned.
>
> Because I didn't yet thought how/where the allocation could be done
> instead, this code is currently simplifying things, and that's why I
> picked this MAX_HANDLE_SZ.
>
> Sorry, I should have pointed that out at in a comment as well.
>
> [0] https://lore.kernel.org/all/CAJfpegszP+2XA=vADK4r09KU30BQd-r9sNu2Dog88yLG8iV7WQ@xxxxxxxxxxxxxx
>
> >> + int ret = -ENOMEM;
> >> +
> >> + fh = kzalloc(fh_size, GFP_KERNEL);
> >> + if (!fh)
> >> + return ret;
> >> +
> >> + memset(outarg, 0, sizeof(struct fuse_entry2_out));
> >> + args->opcode = FUSE_LOOKUP_HANDLE;
> >> + args->nodeid = nodeid;
> >> + args->in_numargs = 3;
> >> + fuse_set_zero_arg0(args);
> >> + args->in_args[1].size = name->len;
> >> + args->in_args[1].value = name->name;
> >> + args->in_args[2].size = 1;
> >> + args->in_args[2].value = "";
> >> + if (fi && fi->fh) {
> >
> > Same here fi->ffh? or fi->fhandle
>
> Ack!
>
> >> + args->is_ext = true;
> >> + args->ext_idx = args->in_numargs++;
> >> + args->in_args[args->ext_idx].size = 0;
> >> + ret = create_ext_handle(&args->in_args[args->ext_idx], fi);
> >> + if (ret) {
> >> + kfree(fh);
> >> + return ret;
> >> + }
> >> + }
> >> + args->out_numargs = 2;
> >> + args->out_argvar = true;
> >> + args->out_argvar_idx = 1;
> >> + args->out_args[0].size = sizeof(struct fuse_entry2_out);
> >> + args->out_args[0].value = outarg;
> >> +
> >> + /* XXX do allocation to the actual size of the handle */
> >> + args->out_args[1].size = fh_size;
> >> + args->out_args[1].value = fh;
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +static void fuse_req_free_argvar_ext(struct fuse_args *args)
> >> +{
> >> + if (args->out_argvar)
> >> + kfree(args->out_args[args->out_argvar_idx].value);
> >> + if (args->is_ext)
> >> + kfree(args->in_args[args->ext_idx].value);
> >> +}
> >> +
> >
> > Just wanted to point out that statx_out is > 256 bytes on stack
> > so allocating 127+4 and the added complexity of ext arg
> > seem awkward.
> >
> > Unless we really want to support huge file handles (we don't?)
> > maybe the allocation can be restricted to fi->handle?
> > Not sure.
>
> If I understand you correctly, you're suggesting that the out_arg that
> will return the handle should be handled on the stack as well and then it
> would be copied to an allocated fi->handle. Sure, that can be done.
>
> On the other hand, as I mentioned above, the outarg allocation is just a
> simplification. So maybe the actual allocation of the handle may be done
> elsewhere with the _actual_ fh size, and then simply used in fh->handle.
>
> Please let me know if I got your comment right.
> (And thanks for the comments, by the way!)

file handle on stack only makes sense for small pre allocated size.
If the server has full control over handle size, then that is not relevant.

At some point we will need to address the fact that the most common
case is for very small file handles.

In struct fanotify_fid_event, we used a small inline buffer to optimize this
case. This could also be done for fuse_inode::handle, but we can worry about
that later.

Thanks,
Amir.