From: Bernd Schubert <bernd.schubert@fastmail.fm>
To: Joanne Koong <joannelkoong@gmail.com>, miklos@szeredi.hu
Cc: linux-fsdevel@vger.kernel.org, jlayton@kernel.org, kernel-team@meta.com
Subject: Re: [PATCH v1] fuse: use splice for reading user pages on servers that enable it
Date: Mon, 21 Apr 2025 13:49:05 +0200 [thread overview]
Message-ID: <5332002a-e444-4f62-8217-8d124182290d@fastmail.fm> (raw)
In-Reply-To: <20250419000614.3795331-1-joannelkoong@gmail.com>
On 4/19/25 02:06, Joanne Koong wrote:
> For direct io writes, splice is disabled when forwarding pages from the
> client to the server. This is because the pages in the pipe buffer are
> user pages, which is controlled by the client. Thus if a server replies
> to the request and then keeps accessing the pages afterwards, there is
> the possibility that the client may have modified the contents of the
> pages in the meantime. More context on this can be found in commit
> 0c4bcfdecb1a ("fuse: fix pipe buffer lifetime for direct_io").
>
> For servers that do not need to access pages after answering the
> request, splice gives a non-trivial improvement in performance.
> Benchmarks show roughly a 40% speedup.
>
> Allow servers to enable zero-copy splice for servicing client direct io
> writes. By enabling this, the server understands that they should not
> continue accessing the pipe buffer after completing the request or may
> face incorrect data if they do so.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
> fs/fuse/dev.c | 18 ++++++++++--------
> fs/fuse/dev_uring.c | 4 ++--
> fs/fuse/fuse_dev_i.h | 5 +++--
> fs/fuse/fuse_i.h | 3 +++
> fs/fuse/inode.c | 5 ++++-
> include/uapi/linux/fuse.h | 8 ++++++++
> 6 files changed, 30 insertions(+), 13 deletions(-)
>
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 67d07b4c778a..1b0ea8593f74 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -816,12 +816,13 @@ static int unlock_request(struct fuse_req *req)
> return err;
> }
>
> -void fuse_copy_init(struct fuse_copy_state *cs, bool write,
> - struct iov_iter *iter)
> +void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
> + bool write, struct iov_iter *iter)
> {
> memset(cs, 0, sizeof(*cs));
> cs->write = write;
> cs->iter = iter;
> + cs->splice_read_user_pages = fc->splice_read_user_pages;
> }
>
> /* Unmap and put previous page of userspace buffer */
> @@ -1105,9 +1106,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
> if (cs->write && cs->pipebufs && page) {
> /*
> * Can't control lifetime of pipe buffers, so always
> - * copy user pages.
> + * copy user pages if server does not support splice
> + * for reading user pages.
> */
> - if (cs->req->args->user_pages) {
> + if (cs->req->args->user_pages && !cs->splice_read_user_pages) {
> err = fuse_copy_fill(cs);
> if (err)
> return err;
> @@ -1538,7 +1540,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
> if (!user_backed_iter(to))
> return -EINVAL;
>
> - fuse_copy_init(&cs, true, to);
> + fuse_copy_init(&cs, fud->fc, true, to);
>
> return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
> }
> @@ -1561,7 +1563,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
> if (!bufs)
> return -ENOMEM;
>
> - fuse_copy_init(&cs, true, NULL);
> + fuse_copy_init(&cs, fud->fc, true, NULL);
> cs.pipebufs = bufs;
> cs.pipe = pipe;
> ret = fuse_dev_do_read(fud, in, &cs, len);
> @@ -2227,7 +2229,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
> if (!user_backed_iter(from))
> return -EINVAL;
>
> - fuse_copy_init(&cs, false, from);
> + fuse_copy_init(&cs, fud->fc, false, from);
>
> return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
> }
> @@ -2301,7 +2303,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
> }
> pipe_unlock(pipe);
>
> - fuse_copy_init(&cs, false, NULL);
> + fuse_copy_init(&cs, fud->fc, false, NULL);
> cs.pipebufs = bufs;
> cs.nr_segs = nbuf;
> cs.pipe = pipe;
> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
> index ef470c4a9261..52b883a6a79d 100644
> --- a/fs/fuse/dev_uring.c
> +++ b/fs/fuse/dev_uring.c
> @@ -593,7 +593,7 @@ static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
> if (err)
> return err;
>
> - fuse_copy_init(&cs, false, &iter);
> + fuse_copy_init(&cs, ring->fc, false, &iter);
> cs.is_uring = true;
> cs.req = req;
>
> @@ -623,7 +623,7 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
> return err;
> }
>
> - fuse_copy_init(&cs, true, &iter);
> + fuse_copy_init(&cs, ring->fc, true, &iter);
> cs.is_uring = true;
> cs.req = req;
>
> diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h
> index db136e045925..25e593e64c67 100644
> --- a/fs/fuse/fuse_dev_i.h
> +++ b/fs/fuse/fuse_dev_i.h
> @@ -32,6 +32,7 @@ struct fuse_copy_state {
> bool write:1;
> bool move_pages:1;
> bool is_uring:1;
> + bool splice_read_user_pages:1;
> struct {
> unsigned int copied_sz; /* copied size into the user buffer */
> } ring;
> @@ -51,8 +52,8 @@ struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique);
>
> void fuse_dev_end_requests(struct list_head *head);
>
> -void fuse_copy_init(struct fuse_copy_state *cs, bool write,
> - struct iov_iter *iter);
> +void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *conn,
> + bool write, struct iov_iter *iter);
> int fuse_copy_args(struct fuse_copy_state *cs, unsigned int numargs,
> unsigned int argpages, struct fuse_arg *args,
> int zeroing);
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 3d5289cb82a5..e21875f16220 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -898,6 +898,9 @@ struct fuse_conn {
> /* Use io_uring for communication */
> bool io_uring:1;
>
> + /* Allow splice for reading user pages */
> + bool splice_read_user_pages:1;
> +
> /** Maximum stack depth for passthrough backing files */
> int max_stack_depth;
>
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 43b6643635ee..e82e96800fde 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -1439,6 +1439,9 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
>
> if (flags & FUSE_REQUEST_TIMEOUT)
> timeout = arg->request_timeout;
> +
> + if (flags & FUSE_SPLICE_READ_USER_PAGES)
> + fc->splice_read_user_pages = true;
Shouldn't that check for capable(CAP_SYS_ADMIN)? Isn't the issue
that one can access file content although the write is already
marked as completed? I.e. a fuse file system might get data
it was never exposed to and possibly secret data?
A more complex version version could check for CAP_SYS_ADMIN, but
also allow later on read/write to files that have the same uid as
the fuser-server process?
Thanks,
Bernd
> } else {
> ra_pages = fc->max_read / PAGE_SIZE;
> fc->no_lock = true;
> @@ -1489,7 +1492,7 @@ void fuse_send_init(struct fuse_mount *fm)
> FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
> FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP |
> FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_ALLOW_IDMAP |
> - FUSE_REQUEST_TIMEOUT;
> + FUSE_REQUEST_TIMEOUT | FUSE_SPLICE_READ_USER_PAGES;
> #ifdef CONFIG_FUSE_DAX
> if (fm->fc->dax)
> flags |= FUSE_MAP_ALIGNMENT;
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 122d6586e8d4..b35f6bbcb322 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -235,6 +235,9 @@
> *
> * 7.44
> * - add FUSE_NOTIFY_INC_EPOCH
> + *
> + * 7.45
> + * - add FUSE_SPLICE_READ_USER_PAGES
> */
>
> #ifndef _LINUX_FUSE_H
> @@ -443,6 +446,10 @@ struct fuse_file_lock {
> * FUSE_OVER_IO_URING: Indicate that client supports io-uring
> * FUSE_REQUEST_TIMEOUT: kernel supports timing out requests.
> * init_out.request_timeout contains the timeout (in secs)
> + * FUSE_SPLICE_READ_USER_PAGES: kernel supports splice on the device for reading
> + * user pages. If the server enables this, then the
> + * server should not access the pipe buffer after
> + * completing the request.
> */
> #define FUSE_ASYNC_READ (1 << 0)
> #define FUSE_POSIX_LOCKS (1 << 1)
> @@ -490,6 +497,7 @@ struct fuse_file_lock {
> #define FUSE_ALLOW_IDMAP (1ULL << 40)
> #define FUSE_OVER_IO_URING (1ULL << 41)
> #define FUSE_REQUEST_TIMEOUT (1ULL << 42)
> +#define FUSE_SPLICE_READ_USER_PAGES (1ULL << 43)
>
> /**
> * CUSE INIT request/reply flags
next prev parent reply other threads:[~2025-04-21 11:49 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-19 0:06 [PATCH v1] fuse: use splice for reading user pages on servers that enable it Joanne Koong
2025-04-21 11:49 ` Bernd Schubert [this message]
2025-04-21 12:35 ` Jeff Layton
2025-04-21 21:38 ` Bernd Schubert
2025-04-22 0:49 ` Joanne Koong
2025-04-22 11:37 ` Jeff Layton
2025-04-22 17:41 ` Joanne Koong
2025-04-22 11:33 ` Jeff Layton
2025-04-22 17:49 ` Joanne Koong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5332002a-e444-4f62-8217-8d124182290d@fastmail.fm \
--to=bernd.schubert@fastmail.fm \
--cc=jlayton@kernel.org \
--cc=joannelkoong@gmail.com \
--cc=kernel-team@meta.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=miklos@szeredi.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox