From: "Darrick J. Wong" <djwong@kernel.org>
To: Joanne Koong <joannelkoong@gmail.com>
Cc: brauner@kernel.org, miklos@szeredi.hu, hch@infradead.org,
hsiangkao@linux.alibaba.com, linux-block@vger.kernel.org,
gfs2@lists.linux.dev, linux-fsdevel@vger.kernel.org,
kernel-team@meta.com, linux-xfs@vger.kernel.org,
linux-doc@vger.kernel.org
Subject: Re: [PATCH v3 14/15] fuse: use iomap for readahead
Date: Thu, 18 Sep 2025 15:35:26 -0700 [thread overview]
Message-ID: <20250918223526.GA1587915@frogsfrogsfrogs> (raw)
In-Reply-To: <20250916234425.1274735-15-joannelkoong@gmail.com>
On Tue, Sep 16, 2025 at 04:44:24PM -0700, Joanne Koong wrote:
> Do readahead in fuse using iomap. This gives us granular uptodate
> tracking for large folios, which optimizes how much data needs to be
> read in. If some portions of the folio are already uptodate (eg through
> a prior write), we only need to read in the non-uptodate portions.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Looks generally ok to me,
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
--D
> ---
> fs/fuse/file.c | 220 ++++++++++++++++++++++++++++---------------------
> 1 file changed, 124 insertions(+), 96 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 4f27a3b0c20a..db0b1f20fee4 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -844,8 +844,65 @@ static const struct iomap_ops fuse_iomap_ops = {
>
> struct fuse_fill_read_data {
> struct file *file;
> +
> + /* Fields below are used if sending the read request asynchronously */
> + struct fuse_conn *fc;
> + struct fuse_io_args *ia;
> + unsigned int nr_bytes;
> };
>
> +/* forward declarations */
> +static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
> + unsigned len, struct fuse_args_pages *ap,
> + unsigned cur_bytes, bool write);
> +static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
> + unsigned int count, bool async);
> +
> +static int fuse_handle_readahead(struct folio *folio,
> + struct readahead_control *rac,
> + struct fuse_fill_read_data *data, loff_t pos,
> + size_t len)
> +{
> + struct fuse_io_args *ia = data->ia;
> + size_t off = offset_in_folio(folio, pos);
> + struct fuse_conn *fc = data->fc;
> + struct fuse_args_pages *ap;
> + unsigned int nr_pages;
> +
> + if (ia && fuse_folios_need_send(fc, pos, len, &ia->ap, data->nr_bytes,
> + false)) {
> + fuse_send_readpages(ia, data->file, data->nr_bytes,
> + fc->async_read);
> + data->nr_bytes = 0;
> + data->ia = NULL;
> + ia = NULL;
> + }
> + if (!ia) {
> + if (fc->num_background >= fc->congestion_threshold &&
> + rac->ra->async_size >= readahead_count(rac))
> + /*
> + * Congested and only async pages left, so skip the
> + * rest.
> + */
> + return -EAGAIN;
> +
> + nr_pages = min(fc->max_pages, readahead_count(rac));
> + data->ia = fuse_io_alloc(NULL, nr_pages);
> + if (!data->ia)
> + return -ENOMEM;
> + ia = data->ia;
> + }
> + folio_get(folio);
> + ap = &ia->ap;
> + ap->folios[ap->num_folios] = folio;
> + ap->descs[ap->num_folios].offset = off;
> + ap->descs[ap->num_folios].length = len;
> + data->nr_bytes += len;
> + ap->num_folios++;
> +
> + return 0;
> +}
> +
> static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
> struct iomap_read_folio_ctx *ctx,
> size_t len)
> @@ -857,18 +914,40 @@ static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
> struct file *file = data->file;
> int ret;
>
> - /*
> - * for non-readahead read requests, do reads synchronously since
> - * it's not guaranteed that the server can handle out-of-order reads
> - */
> iomap_start_folio_read(folio, len);
> - ret = fuse_do_readfolio(file, folio, off, len);
> - iomap_finish_folio_read(folio, off, len, ret);
> + if (ctx->rac) {
> + ret = fuse_handle_readahead(folio, ctx->rac, data, pos, len);
> + /*
> + * If fuse_handle_readahead was successful, fuse_readpages_end
> + * will do the iomap_finish_folio_read, else we need to call it
> + * here
> + */
> + if (ret)
> + iomap_finish_folio_read(folio, off, len, ret);
> + } else {
> + /*
> + * for non-readahead read requests, do reads synchronously
> + * since it's not guaranteed that the server can handle
> + * out-of-order reads
> + */
> + ret = fuse_do_readfolio(file, folio, off, len);
> + iomap_finish_folio_read(folio, off, len, ret);
> + }
> return ret;
> }
>
> +static void fuse_iomap_read_submit(struct iomap_read_folio_ctx *ctx)
> +{
> + struct fuse_fill_read_data *data = ctx->read_ctx;
> +
> + if (data->ia)
> + fuse_send_readpages(data->ia, data->file, data->nr_bytes,
> + data->fc->async_read);
> +}
> +
> static const struct iomap_read_ops fuse_iomap_read_ops = {
> .read_folio_range = fuse_iomap_read_folio_range_async,
> + .submit_read = fuse_iomap_read_submit,
> };
>
> static int fuse_read_folio(struct file *file, struct folio *folio)
> @@ -930,7 +1009,8 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
> }
>
> for (i = 0; i < ap->num_folios; i++) {
> - folio_end_read(ap->folios[i], !err);
> + iomap_finish_folio_read(ap->folios[i], ap->descs[i].offset,
> + ap->descs[i].length, err);
> folio_put(ap->folios[i]);
> }
> if (ia->ff)
> @@ -940,7 +1020,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
> }
>
> static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
> - unsigned int count)
> + unsigned int count, bool async)
> {
> struct fuse_file *ff = file->private_data;
> struct fuse_mount *fm = ff->fm;
> @@ -962,7 +1042,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
>
> fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
> ia->read.attr_ver = fuse_get_attr_version(fm->fc);
> - if (fm->fc->async_read) {
> + if (async) {
> ia->ff = fuse_file_get(ff);
> ap->args.end = fuse_readpages_end;
> err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
> @@ -979,81 +1059,20 @@ static void fuse_readahead(struct readahead_control *rac)
> {
> struct inode *inode = rac->mapping->host;
> struct fuse_conn *fc = get_fuse_conn(inode);
> - unsigned int max_pages, nr_pages;
> - struct folio *folio = NULL;
> + struct fuse_fill_read_data data = {
> + .file = rac->file,
> + .fc = fc,
> + };
> + struct iomap_read_folio_ctx ctx = {
> + .ops = &fuse_iomap_read_ops,
> + .rac = rac,
> + .read_ctx = &data
> + };
>
> if (fuse_is_bad(inode))
> return;
>
> - max_pages = min_t(unsigned int, fc->max_pages,
> - fc->max_read / PAGE_SIZE);
> -
> - /*
> - * This is only accurate the first time through, since readahead_folio()
> - * doesn't update readahead_count() from the previous folio until the
> - * next call. Grab nr_pages here so we know how many pages we're going
> - * to have to process. This means that we will exit here with
> - * readahead_count() == folio_nr_pages(last_folio), but we will have
> - * consumed all of the folios, and read_pages() will call
> - * readahead_folio() again which will clean up the rac.
> - */
> - nr_pages = readahead_count(rac);
> -
> - while (nr_pages) {
> - struct fuse_io_args *ia;
> - struct fuse_args_pages *ap;
> - unsigned cur_pages = min(max_pages, nr_pages);
> - unsigned int pages = 0;
> -
> - if (fc->num_background >= fc->congestion_threshold &&
> - rac->ra->async_size >= readahead_count(rac))
> - /*
> - * Congested and only async pages left, so skip the
> - * rest.
> - */
> - break;
> -
> - ia = fuse_io_alloc(NULL, cur_pages);
> - if (!ia)
> - break;
> - ap = &ia->ap;
> -
> - while (pages < cur_pages) {
> - unsigned int folio_pages;
> -
> - /*
> - * This returns a folio with a ref held on it.
> - * The ref needs to be held until the request is
> - * completed, since the splice case (see
> - * fuse_try_move_page()) drops the ref after it's
> - * replaced in the page cache.
> - */
> - if (!folio)
> - folio = __readahead_folio(rac);
> -
> - folio_pages = folio_nr_pages(folio);
> - if (folio_pages > cur_pages - pages) {
> - /*
> - * Large folios belonging to fuse will never
> - * have more pages than max_pages.
> - */
> - WARN_ON(!pages);
> - break;
> - }
> -
> - ap->folios[ap->num_folios] = folio;
> - ap->descs[ap->num_folios].length = folio_size(folio);
> - ap->num_folios++;
> - pages += folio_pages;
> - folio = NULL;
> - }
> - fuse_send_readpages(ia, rac->file, pages << PAGE_SHIFT);
> - nr_pages -= pages;
> - }
> - if (folio) {
> - folio_end_read(folio, false);
> - folio_put(folio);
> - }
> + iomap_readahead(&fuse_iomap_ops, &ctx);
> }
>
> static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
> @@ -2084,7 +2103,7 @@ struct fuse_fill_wb_data {
> struct fuse_file *ff;
> unsigned int max_folios;
> /*
> - * nr_bytes won't overflow since fuse_writepage_need_send() caps
> + * nr_bytes won't overflow since fuse_folios_need_send() caps
> * wb requests to never exceed fc->max_pages (which has an upper bound
> * of U16_MAX).
> */
> @@ -2129,14 +2148,15 @@ static void fuse_writepages_send(struct inode *inode,
> spin_unlock(&fi->lock);
> }
>
> -static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
> - unsigned len, struct fuse_args_pages *ap,
> - struct fuse_fill_wb_data *data)
> +static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
> + unsigned len, struct fuse_args_pages *ap,
> + unsigned cur_bytes, bool write)
> {
> struct folio *prev_folio;
> struct fuse_folio_desc prev_desc;
> - unsigned bytes = data->nr_bytes + len;
> + unsigned bytes = cur_bytes + len;
> loff_t prev_pos;
> + size_t max_bytes = write ? fc->max_write : fc->max_read;
>
> WARN_ON(!ap->num_folios);
>
> @@ -2144,8 +2164,7 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
> if ((bytes + PAGE_SIZE - 1) >> PAGE_SHIFT > fc->max_pages)
> return true;
>
> - /* Reached max write bytes */
> - if (bytes > fc->max_write)
> + if (bytes > max_bytes)
> return true;
>
> /* Discontinuity */
> @@ -2155,11 +2174,6 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
> if (prev_pos != pos)
> return true;
>
> - /* Need to grow the pages array? If so, did the expansion fail? */
> - if (ap->num_folios == data->max_folios &&
> - !fuse_pages_realloc(data, fc->max_pages))
> - return true;
> -
> return false;
> }
>
> @@ -2183,10 +2197,24 @@ static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc,
> return -EIO;
> }
>
> - if (wpa && fuse_writepage_need_send(fc, pos, len, ap, data)) {
> - fuse_writepages_send(inode, data);
> - data->wpa = NULL;
> - data->nr_bytes = 0;
> + if (wpa) {
> + bool send = fuse_folios_need_send(fc, pos, len, ap,
> + data->nr_bytes, true);
> +
> + if (!send) {
> + /*
> + * Need to grow the pages array? If so, did the
> + * expansion fail?
> + */
> + send = (ap->num_folios == data->max_folios) &&
> + !fuse_pages_realloc(data, fc->max_pages);
> + }
> +
> + if (send) {
> + fuse_writepages_send(inode, data);
> + data->wpa = NULL;
> + data->nr_bytes = 0;
> + }
> }
>
> if (data->wpa == NULL) {
> --
> 2.47.3
>
>
next prev parent reply other threads:[~2025-09-18 22:35 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-16 23:44 [PATCH v3 00/15] fuse: use iomap for buffered reads + readahead Joanne Koong
2025-09-16 23:44 ` [PATCH v3 01/15] iomap: move bio read logic into helper function Joanne Koong
2025-09-18 21:27 ` Darrick J. Wong
2025-09-16 23:44 ` [PATCH v3 02/15] iomap: move read/readahead bio submission " Joanne Koong
2025-09-18 21:27 ` Darrick J. Wong
2025-09-16 23:44 ` [PATCH v3 03/15] iomap: store read/readahead bio generically Joanne Koong
2025-09-18 21:29 ` Darrick J. Wong
2025-09-16 23:44 ` [PATCH v3 04/15] iomap: iterate over entire folio in iomap_readpage_iter() Joanne Koong
2025-09-18 21:37 ` Darrick J. Wong
2025-09-22 22:33 ` Joanne Koong
2025-09-16 23:44 ` [PATCH v3 05/15] iomap: rename iomap_readpage_iter() to iomap_read_folio_iter() Joanne Koong
2025-09-16 23:44 ` [PATCH v3 06/15] iomap: rename iomap_readpage_ctx struct to iomap_read_folio_ctx Joanne Koong
2025-09-16 23:44 ` [PATCH v3 07/15] iomap: track read/readahead folio ownership internally Joanne Koong
2025-09-18 21:49 ` Darrick J. Wong
2025-09-19 18:14 ` Joanne Koong
2025-09-16 23:44 ` [PATCH v3 08/15] iomap: add public start/finish folio read helpers Joanne Koong
2025-09-16 23:44 ` [PATCH v3 09/15] iomap: add caller-provided callbacks for read and readahead Joanne Koong
2025-09-16 23:44 ` [PATCH v3 10/15] iomap: add bias for async read requests Joanne Koong
2025-09-18 22:30 ` Darrick J. Wong
2025-09-19 18:34 ` Joanne Koong
2025-09-22 18:33 ` Christoph Hellwig
2025-09-22 20:54 ` Matthew Wilcox
2025-09-24 22:56 ` Joanne Koong
2025-09-22 23:19 ` Joanne Koong
2025-09-16 23:44 ` [PATCH v3 11/15] iomap: move buffered io bio logic into new file Joanne Koong
2025-09-17 21:40 ` kernel test robot
2025-09-18 22:31 ` Darrick J. Wong
2025-09-19 15:33 ` Christoph Hellwig
2025-09-19 15:32 ` Christoph Hellwig
2025-09-16 23:44 ` [PATCH v3 12/15] iomap: make iomap_read_folio() a void return Joanne Koong
2025-09-18 21:55 ` Darrick J. Wong
2025-09-16 23:44 ` [PATCH v3 13/15] fuse: use iomap for read_folio Joanne Koong
2025-09-16 23:44 ` [PATCH v3 14/15] fuse: use iomap for readahead Joanne Koong
2025-09-18 22:35 ` Darrick J. Wong [this message]
2025-09-16 23:44 ` [PATCH v3 15/15] fuse: remove fc->blkbits workaround for partial writes Joanne Koong
2025-09-18 22:35 ` Darrick J. Wong
2025-09-17 8:30 ` [syzbot ci] Re: fuse: use iomap for buffered reads + readahead syzbot ci
2025-09-17 19:59 ` Joanne Koong
2025-09-18 15:48 ` Aleksandr Nogikh
2025-09-18 21:15 ` Joanne Koong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250918223526.GA1587915@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=brauner@kernel.org \
--cc=gfs2@lists.linux.dev \
--cc=hch@infradead.org \
--cc=hsiangkao@linux.alibaba.com \
--cc=joannelkoong@gmail.com \
--cc=kernel-team@meta.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
--cc=miklos@szeredi.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).