From: Luis Henriques <luis@igalia.com>
To: Amir Goldstein <amir73il@gmail.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>,
Bernd Schubert <bschubert@ddn.com>,
Bernd Schubert <bernd@bsbernd.com>,
"Darrick J. Wong" <djwong@kernel.org>,
Horst Birthelmer <hbirthelmer@ddn.com>,
Joanne Koong <joannelkoong@gmail.com>,
Kevin Chen <kchen@ddn.com>,
linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
Matt Harvey <mharvey@jumptrading.com>,
kernel-dev@igalia.com
Subject: Re: [RFC PATCH v3 6/8] fuse: implementation of lookup_handle+statx compound operation
Date: Thu, 26 Feb 2026 10:33:01 +0000 [thread overview]
Message-ID: <87v7fj7q1u.fsf@wotan.olymp> (raw)
In-Reply-To: <CAOQ4uxj-uVBvLQZxpsfNC+AR8+kFGUDEV6tOzH76AC0KU_g7Hg@mail.gmail.com> (Amir Goldstein's message of "Thu, 26 Feb 2026 11:08:02 +0100")
On Thu, Feb 26 2026, Amir Goldstein wrote:
> On Thu, Feb 26, 2026 at 10:54 AM Luis Henriques <luis@igalia.com> wrote:
>>
>> Hi Amir,
>>
>> On Wed, Feb 25 2026, Amir Goldstein wrote:
>>
>> > On Wed, Feb 25, 2026 at 12:25 PM Luis Henriques <luis@igalia.com> wrote:
>> >>
>> >> The implementation of lookup_handle+statx compound operation extends the
>> >> lookup operation so that a file handle is be passed into the kernel. It
>> >> also needs to include an extra inarg, so that the parent directory file
>> >> handle can be sent to user-space. This extra inarg is added as an extension
>> >> header to the request.
>> >>
>> >> By having a separate statx including in a compound operation allows the
>> >> attr to be dropped from the lookup_handle request, simplifying the
>> >> traditional FUSE lookup operation.
>> >>
>> >> Signed-off-by: Luis Henriques <luis@igalia.com>
>> >> ---
>> >> fs/fuse/dir.c | 294 +++++++++++++++++++++++++++++++++++---
>> >> fs/fuse/fuse_i.h | 23 ++-
>> >> fs/fuse/inode.c | 48 +++++--
>> >> fs/fuse/readdir.c | 2 +-
>> >> include/uapi/linux/fuse.h | 23 ++-
>> >> 5 files changed, 355 insertions(+), 35 deletions(-)
>> >>
>> >> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
>> >> index 5c0f1364c392..7fa8c405f1a3 100644
>> >> --- a/fs/fuse/dir.c
>> >> +++ b/fs/fuse/dir.c
>> >> @@ -21,6 +21,7 @@
>> >> #include <linux/security.h>
>> >> #include <linux/types.h>
>> >> #include <linux/kernel.h>
>> >> +#include <linux/exportfs.h>
>> >>
>> >> static bool __read_mostly allow_sys_admin_access;
>> >> module_param(allow_sys_admin_access, bool, 0644);
>> >> @@ -372,6 +373,47 @@ static void fuse_lookup_init(struct fuse_args *args, u64 nodeid,
>> >> args->out_args[0].value = outarg;
>> >> }
>> >>
>> >> +static int do_lookup_handle_statx(struct fuse_mount *fm, u64 parent_nodeid,
>> >> + struct inode *parent_inode,
>> >> + const struct qstr *name,
>> >> + struct fuse_entry2_out *lookup_out,
>> >> + struct fuse_statx_out *statx_out,
>> >> + struct fuse_file_handle **fh);
>> >> +static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr);
>> >> +static int do_reval_lookup(struct fuse_mount *fm, u64 parent_nodeid,
>> >> + const struct qstr *name, u64 *nodeid,
>> >> + u64 *generation, u64 *attr_valid,
>> >> + struct fuse_attr *attr, struct fuse_file_handle **fh)
>> >> +{
>> >> + struct fuse_entry_out entry_out;
>> >> + struct fuse_entry2_out lookup_out;
>> >> + struct fuse_statx_out statx_out;
>> >> + FUSE_ARGS(lookup_args);
>> >> + int ret = 0;
>> >> +
>> >> + if (fm->fc->lookup_handle) {
>> >> + ret = do_lookup_handle_statx(fm, parent_nodeid, NULL, name,
>> >> + &lookup_out, &statx_out, fh);
>> >> + if (!ret) {
>> >> + *nodeid = lookup_out.nodeid;
>> >> + *generation = lookup_out.generation;
>> >> + *attr_valid = fuse_time_to_jiffies(lookup_out.entry_valid,
>> >> + lookup_out.entry_valid_nsec);
>> >> + fuse_statx_to_attr(&statx_out.stat, attr);
>> >> + }
>> >> + } else {
>> >> + fuse_lookup_init(&lookup_args, parent_nodeid, name, &entry_out);
>> >> + ret = fuse_simple_request(fm, &lookup_args);
>> >> + if (!ret) {
>> >> + *nodeid = entry_out.nodeid;
>> >> + *generation = entry_out.generation;
>> >> + *attr_valid = ATTR_TIMEOUT(&entry_out);
>> >> + memcpy(attr, &entry_out.attr, sizeof(*attr));
>> >> + }
>> >> + }
>> >> +
>> >> + return ret;
>> >> +}
>> >> /*
>> >> * Check whether the dentry is still valid
>> >> *
>> >> @@ -399,10 +441,11 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
>> >> goto invalid;
>> >> else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
>> >> (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
>> >> - struct fuse_entry_out outarg;
>> >> - FUSE_ARGS(args);
>> >> struct fuse_forget_link *forget;
>> >> + struct fuse_file_handle *fh = NULL;
>> >> u64 attr_version;
>> >> + u64 nodeid, generation, attr_valid;
>> >> + struct fuse_attr attr;
>> >>
>> >> /* For negative dentries, always do a fresh lookup */
>> >> if (!inode)
>> >> @@ -421,35 +464,36 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
>> >>
>> >> attr_version = fuse_get_attr_version(fm->fc);
>> >>
>> >> - fuse_lookup_init(&args, get_node_id(dir), name, &outarg);
>> >> - ret = fuse_simple_request(fm, &args);
>> >> + ret = do_reval_lookup(fm, get_node_id(dir), name, &nodeid,
>> >> + &generation, &attr_valid, &attr, &fh);
>> >> /* Zero nodeid is same as -ENOENT */
>> >> - if (!ret && !outarg.nodeid)
>> >> + if (!ret && !nodeid)
>> >> ret = -ENOENT;
>> >> if (!ret) {
>> >> fi = get_fuse_inode(inode);
>> >> - if (outarg.nodeid != get_node_id(inode) ||
>> >> - (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
>> >> - fuse_queue_forget(fm->fc, forget,
>> >> - outarg.nodeid, 1);
>> >> + if (!fuse_file_handle_is_equal(fm->fc, fi->fh, fh) ||
>> >> + nodeid != get_node_id(inode) ||
>> >> + (bool) IS_AUTOMOUNT(inode) != (bool) (attr.flags & FUSE_ATTR_SUBMOUNT)) {
>> >> + fuse_queue_forget(fm->fc, forget, nodeid, 1);
>> >> + kfree(fh);
>> >> goto invalid;
>> >> }
>> >> spin_lock(&fi->lock);
>> >> fi->nlookup++;
>> >> spin_unlock(&fi->lock);
>> >> }
>> >> + kfree(fh);
>> >> kfree(forget);
>> >> if (ret == -ENOMEM || ret == -EINTR)
>> >> goto out;
>> >> - if (ret || fuse_invalid_attr(&outarg.attr) ||
>> >> - fuse_stale_inode(inode, outarg.generation, &outarg.attr))
>> >> + if (ret || fuse_invalid_attr(&attr) ||
>> >> + fuse_stale_inode(inode, generation, &attr))
>> >> goto invalid;
>> >>
>> >> forget_all_cached_acls(inode);
>> >> - fuse_change_attributes(inode, &outarg.attr, NULL,
>> >> - ATTR_TIMEOUT(&outarg),
>> >> + fuse_change_attributes(inode, &attr, NULL, attr_valid,
>> >> attr_version);
>> >> - fuse_change_entry_timeout(entry, &outarg);
>> >> + fuse_dentry_settime(entry, attr_valid);
>> >> } else if (inode) {
>> >> fi = get_fuse_inode(inode);
>> >> if (flags & LOOKUP_RCU) {
>> >> @@ -546,8 +590,215 @@ bool fuse_invalid_attr(struct fuse_attr *attr)
>> >> return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
>> >> }
>> >>
>> >> -int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
>> >> - u64 *time, struct inode **inode)
>> >> +static int create_ext_handle(struct fuse_in_arg *ext, struct fuse_inode *fi)
>> >> +{
>> >> + struct fuse_ext_header *xh;
>> >> + struct fuse_file_handle *fh;
>> >> + u32 len;
>> >> +
>> >> + len = fuse_ext_size(sizeof(*fi->fh) + fi->fh->size);
>> >> + xh = fuse_extend_arg(ext, len);
>> >> + if (!xh)
>> >> + return -ENOMEM;
>> >> +
>> >> + xh->size = len;
>> >> + xh->type = FUSE_EXT_HANDLE;
>> >> + fh = (struct fuse_file_handle *)&xh[1];
>> >> + fh->size = fi->fh->size;
>> >> + memcpy(fh->handle, fi->fh->handle, fh->size);
>> >> +
>> >> + return 0;
>> >> +}
>> >> +
>> >> +static int fuse_lookup_handle_init(struct fuse_args *args, u64 nodeid,
>> >> + struct fuse_inode *fi,
>> >> + const struct qstr *name,
>> >> + struct fuse_entry2_out *outarg)
>> >> +{
>> >> + struct fuse_file_handle *fh;
>> >
>> > Considering that fuse has long used uint64_t fh as the convention
>> > for a file id all over the code, it would be better to pick a different
>> > convention for fuse file handle, perhaps ffh, or fhandle?
>>
>> Good point, I'll make sure next revision will follow a different
>> convention.
>>
>> >> + size_t fh_size = sizeof(*fh) + MAX_HANDLE_SZ;
>> >
>> > I don't remember what we concluded last time, but
>> > shouldn't the server request max_handle_sz at init?
>> > This constant is quite arbitrary.
>>
>> You're right, I should have pointed that out in the cover letter at least.
>> In the previous version that maximum size was indeed provided by the
>> server. But from the discussion here [0] I understood that this
>> negotiation should be dropped. Here's what Miklos suggested:
>>
>> > How about allocating variable length arguments on demand? That would
>> > allow getting rid of max_handle_size negotiation.
>> >
>> > args->out_var_alloc = true;
>> > args->out_args[1].size = MAX_HANDLE_SZ;
>> > args->out_args[1].value = NULL; /* Will be allocated to the actual size of the handle */
>>
>> Obviously that's not what the code is currently doing. The plan is to
>> eventually set the .value to NULL and do the allocation elsewhere,
>> according to the actual size returned.
>>
>> Because I didn't yet thought how/where the allocation could be done
>> instead, this code is currently simplifying things, and that's why I
>> picked this MAX_HANDLE_SZ.
>>
>> Sorry, I should have pointed that out at in a comment as well.
>>
>> [0] https://lore.kernel.org/all/CAJfpegszP+2XA=vADK4r09KU30BQd-r9sNu2Dog88yLG8iV7WQ@mail.gmail.com
>>
>> >> + int ret = -ENOMEM;
>> >> +
>> >> + fh = kzalloc(fh_size, GFP_KERNEL);
>> >> + if (!fh)
>> >> + return ret;
>> >> +
>> >> + memset(outarg, 0, sizeof(struct fuse_entry2_out));
>> >> + args->opcode = FUSE_LOOKUP_HANDLE;
>> >> + args->nodeid = nodeid;
>> >> + args->in_numargs = 3;
>> >> + fuse_set_zero_arg0(args);
>> >> + args->in_args[1].size = name->len;
>> >> + args->in_args[1].value = name->name;
>> >> + args->in_args[2].size = 1;
>> >> + args->in_args[2].value = "";
>> >> + if (fi && fi->fh) {
>> >
>> > Same here fi->ffh? or fi->fhandle
>>
>> Ack!
>>
>> >> + args->is_ext = true;
>> >> + args->ext_idx = args->in_numargs++;
>> >> + args->in_args[args->ext_idx].size = 0;
>> >> + ret = create_ext_handle(&args->in_args[args->ext_idx], fi);
>> >> + if (ret) {
>> >> + kfree(fh);
>> >> + return ret;
>> >> + }
>> >> + }
>> >> + args->out_numargs = 2;
>> >> + args->out_argvar = true;
>> >> + args->out_argvar_idx = 1;
>> >> + args->out_args[0].size = sizeof(struct fuse_entry2_out);
>> >> + args->out_args[0].value = outarg;
>> >> +
>> >> + /* XXX do allocation to the actual size of the handle */
>> >> + args->out_args[1].size = fh_size;
>> >> + args->out_args[1].value = fh;
>> >> +
>> >> + return 0;
>> >> +}
>> >> +
>> >> +static void fuse_req_free_argvar_ext(struct fuse_args *args)
>> >> +{
>> >> + if (args->out_argvar)
>> >> + kfree(args->out_args[args->out_argvar_idx].value);
>> >> + if (args->is_ext)
>> >> + kfree(args->in_args[args->ext_idx].value);
>> >> +}
>> >> +
>> >
>> > Just wanted to point out that statx_out is > 256 bytes on stack
>> > so allocating 127+4 and the added complexity of ext arg
>> > seem awkward.
>> >
>> > Unless we really want to support huge file handles (we don't?)
>> > maybe the allocation can be restricted to fi->handle?
>> > Not sure.
>>
>> If I understand you correctly, you're suggesting that the out_arg that
>> will return the handle should be handled on the stack as well and then it
>> would be copied to an allocated fi->handle. Sure, that can be done.
>>
>> On the other hand, as I mentioned above, the outarg allocation is just a
>> simplification. So maybe the actual allocation of the handle may be done
>> elsewhere with the _actual_ fh size, and then simply used in fh->handle.
>>
>> Please let me know if I got your comment right.
>> (And thanks for the comments, by the way!)
>
> file handle on stack only makes sense for small pre allocated size.
> If the server has full control over handle size, then that is not relevant.
>
> At some point we will need to address the fact that the most common
> case is for very small file handles.
>
> In struct fanotify_fid_event, we used a small inline buffer to optimize this
> case. This could also be done for fuse_inode::handle, but we can worry about
> that later.
Thanks, I had took a look into it before -- I think you had pointed it to
me!. But I agree that this is something that can be handled once I have
most of the other things sorted out.
Cheers,
--
Luís
next prev parent reply other threads:[~2026-02-26 10:33 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-25 11:24 [RFC PATCH v3 0/8] fuse: LOOKUP_HANDLE operation Luis Henriques
2026-02-25 11:24 ` [RFC PATCH v3 1/8] fuse: simplify fuse_lookup_name() interface Luis Henriques
2026-02-27 15:46 ` Miklos Szeredi
2026-02-28 14:42 ` Luis Henriques
2026-02-25 11:24 ` [RFC PATCH v3 2/8] fuse: export extend_arg() and factor out fuse_ext_size() Luis Henriques
2026-02-25 11:24 ` [RFC PATCH v3 3/8] fuse: store index of the variable length argument Luis Henriques
2026-02-27 15:41 ` Miklos Szeredi
2026-02-28 14:50 ` Luis Henriques
2026-02-25 11:24 ` [RFC PATCH v3 4/8] fuse: drop unnecessary argument from fuse_lookup_init() Luis Henriques
2026-02-27 15:57 ` Miklos Szeredi
2026-02-25 11:24 ` [RFC PATCH v3 5/8] fuse: extract helper functions from fuse_do_statx() Luis Henriques
2026-02-25 11:24 ` [RFC PATCH v3 6/8] fuse: implementation of lookup_handle+statx compound operation Luis Henriques
2026-02-25 18:06 ` Amir Goldstein
2026-02-26 9:54 ` Luis Henriques
2026-02-26 10:08 ` Amir Goldstein
2026-02-26 10:29 ` Miklos Szeredi
2026-02-26 15:06 ` Luis Henriques
2026-02-26 15:44 ` Miklos Szeredi
2026-02-26 16:17 ` Luis Henriques
2026-02-26 10:33 ` Luis Henriques [this message]
2026-02-25 11:24 ` [RFC PATCH v3 7/8] fuse: export fuse_open_args_fill() helper function Luis Henriques
2026-02-25 11:24 ` [RFC PATCH v3 8/8] fuse: implementation of mkobj_handle+statx+open compound operation Luis Henriques
2026-02-25 15:08 ` Horst Birthelmer
2026-02-25 17:26 ` Luis Henriques
2026-02-25 15:14 ` [RFC PATCH v3 0/8] fuse: LOOKUP_HANDLE operation Horst Birthelmer
2026-02-25 17:06 ` Luis Henriques
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87v7fj7q1u.fsf@wotan.olymp \
--to=luis@igalia.com \
--cc=amir73il@gmail.com \
--cc=bernd@bsbernd.com \
--cc=bschubert@ddn.com \
--cc=djwong@kernel.org \
--cc=hbirthelmer@ddn.com \
--cc=joannelkoong@gmail.com \
--cc=kchen@ddn.com \
--cc=kernel-dev@igalia.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mharvey@jumptrading.com \
--cc=miklos@szeredi.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox