Re: [RFC PATCH v3 6/8] fuse: implementation of lookup_handle+statx compound operation

From: Luis Henriques

Date: Thu Feb 26 2026 - 05:34:40 EST


On Thu, Feb 26 2026, Amir Goldstein wrote:

> On Thu, Feb 26, 2026 at 10:54 AM Luis Henriques <luis@xxxxxxxxxx> wrote:
>>
>> Hi Amir,
>>
>> On Wed, Feb 25 2026, Amir Goldstein wrote:
>>
>> > On Wed, Feb 25, 2026 at 12:25 PM Luis Henriques <luis@xxxxxxxxxx> wrote:
>> >>
>> >> The implementation of lookup_handle+statx compound operation extends the
>> >> lookup operation so that a file handle is be passed into the kernel. It
>> >> also needs to include an extra inarg, so that the parent directory file
>> >> handle can be sent to user-space. This extra inarg is added as an extension
>> >> header to the request.
>> >>
>> >> By having a separate statx including in a compound operation allows the
>> >> attr to be dropped from the lookup_handle request, simplifying the
>> >> traditional FUSE lookup operation.
>> >>
>> >> Signed-off-by: Luis Henriques <luis@xxxxxxxxxx>
>> >> ---
>> >> fs/fuse/dir.c | 294 +++++++++++++++++++++++++++++++++++---
>> >> fs/fuse/fuse_i.h | 23 ++-
>> >> fs/fuse/inode.c | 48 +++++--
>> >> fs/fuse/readdir.c | 2 +-
>> >> include/uapi/linux/fuse.h | 23 ++-
>> >> 5 files changed, 355 insertions(+), 35 deletions(-)
>> >>
>> >> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
>> >> index 5c0f1364c392..7fa8c405f1a3 100644
>> >> --- a/fs/fuse/dir.c
>> >> +++ b/fs/fuse/dir.c
>> >> @@ -21,6 +21,7 @@
>> >> #include <linux/security.h>
>> >> #include <linux/types.h>
>> >> #include <linux/kernel.h>
>> >> +#include <linux/exportfs.h>
>> >>
>> >> static bool __read_mostly allow_sys_admin_access;
>> >> module_param(allow_sys_admin_access, bool, 0644);
>> >> @@ -372,6 +373,47 @@ static void fuse_lookup_init(struct fuse_args *args, u64 nodeid,
>> >> args->out_args[0].value = outarg;
>> >> }
>> >>
>> >> +static int do_lookup_handle_statx(struct fuse_mount *fm, u64 parent_nodeid,
>> >> + struct inode *parent_inode,
>> >> + const struct qstr *name,
>> >> + struct fuse_entry2_out *lookup_out,
>> >> + struct fuse_statx_out *statx_out,
>> >> + struct fuse_file_handle **fh);
>> >> +static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr);
>> >> +static int do_reval_lookup(struct fuse_mount *fm, u64 parent_nodeid,
>> >> + const struct qstr *name, u64 *nodeid,
>> >> + u64 *generation, u64 *attr_valid,
>> >> + struct fuse_attr *attr, struct fuse_file_handle **fh)
>> >> +{
>> >> + struct fuse_entry_out entry_out;
>> >> + struct fuse_entry2_out lookup_out;
>> >> + struct fuse_statx_out statx_out;
>> >> + FUSE_ARGS(lookup_args);
>> >> + int ret = 0;
>> >> +
>> >> + if (fm->fc->lookup_handle) {
>> >> + ret = do_lookup_handle_statx(fm, parent_nodeid, NULL, name,
>> >> + &lookup_out, &statx_out, fh);
>> >> + if (!ret) {
>> >> + *nodeid = lookup_out.nodeid;
>> >> + *generation = lookup_out.generation;
>> >> + *attr_valid = fuse_time_to_jiffies(lookup_out.entry_valid,
>> >> + lookup_out.entry_valid_nsec);
>> >> + fuse_statx_to_attr(&statx_out.stat, attr);
>> >> + }
>> >> + } else {
>> >> + fuse_lookup_init(&lookup_args, parent_nodeid, name, &entry_out);
>> >> + ret = fuse_simple_request(fm, &lookup_args);
>> >> + if (!ret) {
>> >> + *nodeid = entry_out.nodeid;
>> >> + *generation = entry_out.generation;
>> >> + *attr_valid = ATTR_TIMEOUT(&entry_out);
>> >> + memcpy(attr, &entry_out.attr, sizeof(*attr));
>> >> + }
>> >> + }
>> >> +
>> >> + return ret;
>> >> +}
>> >> /*
>> >> * Check whether the dentry is still valid
>> >> *
>> >> @@ -399,10 +441,11 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
>> >> goto invalid;
>> >> else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
>> >> (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
>> >> - struct fuse_entry_out outarg;
>> >> - FUSE_ARGS(args);
>> >> struct fuse_forget_link *forget;
>> >> + struct fuse_file_handle *fh = NULL;
>> >> u64 attr_version;
>> >> + u64 nodeid, generation, attr_valid;
>> >> + struct fuse_attr attr;
>> >>
>> >> /* For negative dentries, always do a fresh lookup */
>> >> if (!inode)
>> >> @@ -421,35 +464,36 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
>> >>
>> >> attr_version = fuse_get_attr_version(fm->fc);
>> >>
>> >> - fuse_lookup_init(&args, get_node_id(dir), name, &outarg);
>> >> - ret = fuse_simple_request(fm, &args);
>> >> + ret = do_reval_lookup(fm, get_node_id(dir), name, &nodeid,
>> >> + &generation, &attr_valid, &attr, &fh);
>> >> /* Zero nodeid is same as -ENOENT */
>> >> - if (!ret && !outarg.nodeid)
>> >> + if (!ret && !nodeid)
>> >> ret = -ENOENT;
>> >> if (!ret) {
>> >> fi = get_fuse_inode(inode);
>> >> - if (outarg.nodeid != get_node_id(inode) ||
>> >> - (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
>> >> - fuse_queue_forget(fm->fc, forget,
>> >> - outarg.nodeid, 1);
>> >> + if (!fuse_file_handle_is_equal(fm->fc, fi->fh, fh) ||
>> >> + nodeid != get_node_id(inode) ||
>> >> + (bool) IS_AUTOMOUNT(inode) != (bool) (attr.flags & FUSE_ATTR_SUBMOUNT)) {
>> >> + fuse_queue_forget(fm->fc, forget, nodeid, 1);
>> >> + kfree(fh);
>> >> goto invalid;
>> >> }
>> >> spin_lock(&fi->lock);
>> >> fi->nlookup++;
>> >> spin_unlock(&fi->lock);
>> >> }
>> >> + kfree(fh);
>> >> kfree(forget);
>> >> if (ret == -ENOMEM || ret == -EINTR)
>> >> goto out;
>> >> - if (ret || fuse_invalid_attr(&outarg.attr) ||
>> >> - fuse_stale_inode(inode, outarg.generation, &outarg.attr))
>> >> + if (ret || fuse_invalid_attr(&attr) ||
>> >> + fuse_stale_inode(inode, generation, &attr))
>> >> goto invalid;
>> >>
>> >> forget_all_cached_acls(inode);
>> >> - fuse_change_attributes(inode, &outarg.attr, NULL,
>> >> - ATTR_TIMEOUT(&outarg),
>> >> + fuse_change_attributes(inode, &attr, NULL, attr_valid,
>> >> attr_version);
>> >> - fuse_change_entry_timeout(entry, &outarg);
>> >> + fuse_dentry_settime(entry, attr_valid);
>> >> } else if (inode) {
>> >> fi = get_fuse_inode(inode);
>> >> if (flags & LOOKUP_RCU) {
>> >> @@ -546,8 +590,215 @@ bool fuse_invalid_attr(struct fuse_attr *attr)
>> >> return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
>> >> }
>> >>
>> >> -int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
>> >> - u64 *time, struct inode **inode)
>> >> +static int create_ext_handle(struct fuse_in_arg *ext, struct fuse_inode *fi)
>> >> +{
>> >> + struct fuse_ext_header *xh;
>> >> + struct fuse_file_handle *fh;
>> >> + u32 len;
>> >> +
>> >> + len = fuse_ext_size(sizeof(*fi->fh) + fi->fh->size);
>> >> + xh = fuse_extend_arg(ext, len);
>> >> + if (!xh)
>> >> + return -ENOMEM;
>> >> +
>> >> + xh->size = len;
>> >> + xh->type = FUSE_EXT_HANDLE;
>> >> + fh = (struct fuse_file_handle *)&xh[1];
>> >> + fh->size = fi->fh->size;
>> >> + memcpy(fh->handle, fi->fh->handle, fh->size);
>> >> +
>> >> + return 0;
>> >> +}
>> >> +
>> >> +static int fuse_lookup_handle_init(struct fuse_args *args, u64 nodeid,
>> >> + struct fuse_inode *fi,
>> >> + const struct qstr *name,
>> >> + struct fuse_entry2_out *outarg)
>> >> +{
>> >> + struct fuse_file_handle *fh;
>> >
>> > Considering that fuse has long used uint64_t fh as the convention
>> > for a file id all over the code, it would be better to pick a different
>> > convention for fuse file handle, perhaps ffh, or fhandle?
>>
>> Good point, I'll make sure next revision will follow a different
>> convention.
>>
>> >> + size_t fh_size = sizeof(*fh) + MAX_HANDLE_SZ;
>> >
>> > I don't remember what we concluded last time, but
>> > shouldn't the server request max_handle_sz at init?
>> > This constant is quite arbitrary.
>>
>> You're right, I should have pointed that out in the cover letter at least.
>> In the previous version that maximum size was indeed provided by the
>> server. But from the discussion here [0] I understood that this
>> negotiation should be dropped. Here's what Miklos suggested:
>>
>> > How about allocating variable length arguments on demand? That would
>> > allow getting rid of max_handle_size negotiation.
>> >
>> > args->out_var_alloc = true;
>> > args->out_args[1].size = MAX_HANDLE_SZ;
>> > args->out_args[1].value = NULL; /* Will be allocated to the actual size of the handle */
>>
>> Obviously that's not what the code is currently doing. The plan is to
>> eventually set the .value to NULL and do the allocation elsewhere,
>> according to the actual size returned.
>>
>> Because I didn't yet thought how/where the allocation could be done
>> instead, this code is currently simplifying things, and that's why I
>> picked this MAX_HANDLE_SZ.
>>
>> Sorry, I should have pointed that out at in a comment as well.
>>
>> [0] https://lore.kernel.org/all/CAJfpegszP+2XA=vADK4r09KU30BQd-r9sNu2Dog88yLG8iV7WQ@xxxxxxxxxxxxxx
>>
>> >> + int ret = -ENOMEM;
>> >> +
>> >> + fh = kzalloc(fh_size, GFP_KERNEL);
>> >> + if (!fh)
>> >> + return ret;
>> >> +
>> >> + memset(outarg, 0, sizeof(struct fuse_entry2_out));
>> >> + args->opcode = FUSE_LOOKUP_HANDLE;
>> >> + args->nodeid = nodeid;
>> >> + args->in_numargs = 3;
>> >> + fuse_set_zero_arg0(args);
>> >> + args->in_args[1].size = name->len;
>> >> + args->in_args[1].value = name->name;
>> >> + args->in_args[2].size = 1;
>> >> + args->in_args[2].value = "";
>> >> + if (fi && fi->fh) {
>> >
>> > Same here fi->ffh? or fi->fhandle
>>
>> Ack!
>>
>> >> + args->is_ext = true;
>> >> + args->ext_idx = args->in_numargs++;
>> >> + args->in_args[args->ext_idx].size = 0;
>> >> + ret = create_ext_handle(&args->in_args[args->ext_idx], fi);
>> >> + if (ret) {
>> >> + kfree(fh);
>> >> + return ret;
>> >> + }
>> >> + }
>> >> + args->out_numargs = 2;
>> >> + args->out_argvar = true;
>> >> + args->out_argvar_idx = 1;
>> >> + args->out_args[0].size = sizeof(struct fuse_entry2_out);
>> >> + args->out_args[0].value = outarg;
>> >> +
>> >> + /* XXX do allocation to the actual size of the handle */
>> >> + args->out_args[1].size = fh_size;
>> >> + args->out_args[1].value = fh;
>> >> +
>> >> + return 0;
>> >> +}
>> >> +
>> >> +static void fuse_req_free_argvar_ext(struct fuse_args *args)
>> >> +{
>> >> + if (args->out_argvar)
>> >> + kfree(args->out_args[args->out_argvar_idx].value);
>> >> + if (args->is_ext)
>> >> + kfree(args->in_args[args->ext_idx].value);
>> >> +}
>> >> +
>> >
>> > Just wanted to point out that statx_out is > 256 bytes on stack
>> > so allocating 127+4 and the added complexity of ext arg
>> > seem awkward.
>> >
>> > Unless we really want to support huge file handles (we don't?)
>> > maybe the allocation can be restricted to fi->handle?
>> > Not sure.
>>
>> If I understand you correctly, you're suggesting that the out_arg that
>> will return the handle should be handled on the stack as well and then it
>> would be copied to an allocated fi->handle. Sure, that can be done.
>>
>> On the other hand, as I mentioned above, the outarg allocation is just a
>> simplification. So maybe the actual allocation of the handle may be done
>> elsewhere with the _actual_ fh size, and then simply used in fh->handle.
>>
>> Please let me know if I got your comment right.
>> (And thanks for the comments, by the way!)
>
> file handle on stack only makes sense for small pre allocated size.
> If the server has full control over handle size, then that is not relevant.
>
> At some point we will need to address the fact that the most common
> case is for very small file handles.
>
> In struct fanotify_fid_event, we used a small inline buffer to optimize this
> case. This could also be done for fuse_inode::handle, but we can worry about
> that later.

Thanks, I had took a look into it before -- I think you had pointed it to
me!. But I agree that this is something that can be handled once I have
most of the other things sorted out.

Cheers,
--
Luís