From: "Darrick J. Wong" <djwong@kernel.org>
To: Joanne Koong <joannelkoong@gmail.com>
Cc: brauner@kernel.org, miklos@szeredi.hu, hch@infradead.org,
linux-fsdevel@vger.kernel.org, kernel-team@meta.com,
linux-xfs@vger.kernel.org, linux-doc@vger.kernel.org
Subject: Re: [PATCH v1 15/16] fuse: use iomap for readahead
Date: Wed, 3 Sep 2025 14:17:54 -0700 [thread overview]
Message-ID: <20250903211754.GW1587915@frogsfrogsfrogs> (raw)
In-Reply-To: <20250829235627.4053234-16-joannelkoong@gmail.com>
On Fri, Aug 29, 2025 at 04:56:26PM -0700, Joanne Koong wrote:
> Do readahead in fuse using iomap. This gives us granular uptodate
> tracking for large folios, which optimizes how much data needs to be
> read in. If some portions of the folio are already uptodate (eg through
> a prior write), we only need to read in the non-uptodate portions.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
> fs/fuse/file.c | 214 +++++++++++++++++++++++++++----------------------
> 1 file changed, 118 insertions(+), 96 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index bdfb13cdee4b..1659603f4cb6 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -844,8 +844,73 @@ static const struct iomap_ops fuse_iomap_ops = {
>
> struct fuse_fill_read_data {
> struct file *file;
> + /*
> + * We need to track this because non-readahead requests can't be sent
> + * asynchronously.
> + */
> + bool readahead : 1;
> +
> + /*
> + * Fields below are used if sending the read request
> + * asynchronously.
> + */
> + struct fuse_conn *fc;
> + struct readahead_control *rac;
> + struct fuse_io_args *ia;
> + unsigned int nr_bytes;
> };
>
> +/* forward declarations */
> +static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
> + unsigned len, struct fuse_args_pages *ap,
> + unsigned cur_bytes, bool write);
> +static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
> + unsigned int count, bool async);
> +
> +static int fuse_handle_readahead(struct folio *folio,
> + struct fuse_fill_read_data *data, loff_t pos,
> + size_t len)
> +{
> + struct fuse_io_args *ia = data->ia;
> + size_t off = offset_in_folio(folio, pos);
> + struct fuse_conn *fc = data->fc;
> + struct fuse_args_pages *ap;
> +
> + if (ia && fuse_folios_need_send(fc, pos, len, &ia->ap, data->nr_bytes,
> + false)) {
> + fuse_send_readpages(ia, data->file, data->nr_bytes,
> + fc->async_read);
> + data->nr_bytes = 0;
> + ia = NULL;
> + }
> + if (!ia) {
> + struct readahead_control *rac = data->rac;
> + unsigned nr_pages = min(fc->max_pages, readahead_count(rac));
> +
> + if (fc->num_background >= fc->congestion_threshold &&
> + rac->ra->async_size >= readahead_count(rac))
> + /*
> + * Congested and only async pages left, so skip the
> + * rest.
> + */
> + return -EAGAIN;
> +
> + data->ia = fuse_io_alloc(NULL, nr_pages);
> + if (!data->ia)
> + return -ENOMEM;
> + ia = data->ia;
> + }
> + folio_get(folio);
> + ap = &ia->ap;
> + ap->folios[ap->num_folios] = folio;
> + ap->descs[ap->num_folios].offset = off;
> + ap->descs[ap->num_folios].length = len;
> + data->nr_bytes += len;
> + ap->num_folios++;
> +
> + return 0;
> +}
> +
> static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
> struct folio *folio, loff_t pos,
> size_t len)
> @@ -855,13 +920,24 @@ static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
> size_t off = offset_in_folio(folio, pos);
> int ret;
>
> - /*
> - * for non-readahead read requests, do reads synchronously since
> - * it's not guaranteed that the server can handle out-of-order reads
> - */
> iomap_start_folio_read(folio, len);
> - ret = fuse_do_readfolio(file, folio, off, len);
> - iomap_finish_folio_read(folio, off, len, ret);
> + if (data->readahead) {
> + ret = fuse_handle_readahead(folio, data, pos, len);
> + /*
> + * If fuse_handle_readahead was successful, fuse_readpages_end
> + * will do the iomap_finish_folio_read, else we need to call it
> + * here
> + */
> + if (ret)
> + iomap_finish_folio_read(folio, off, len, ret);
> + } else {
> + /*
> + * for non-readahead read requests, do reads synchronously since
> + * it's not guaranteed that the server can handle out-of-order reads
> + */
> + ret = fuse_do_readfolio(file, folio, off, len);
> + iomap_finish_folio_read(folio, off, len, ret);
> + }
> return ret;
> }
>
> @@ -923,7 +999,8 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
> }
>
> for (i = 0; i < ap->num_folios; i++) {
> - folio_end_read(ap->folios[i], !err);
> + iomap_finish_folio_read(ap->folios[i], ap->descs[i].offset,
> + ap->descs[i].length, err);
> folio_put(ap->folios[i]);
> }
> if (ia->ff)
> @@ -933,7 +1010,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
> }
>
> static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
> - unsigned int count)
> + unsigned int count, bool async)
> {
> struct fuse_file *ff = file->private_data;
> struct fuse_mount *fm = ff->fm;
> @@ -955,7 +1032,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
>
> fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
> ia->read.attr_ver = fuse_get_attr_version(fm->fc);
> - if (fm->fc->async_read) {
> + if (async) {
> ia->ff = fuse_file_get(ff);
> ap->args.end = fuse_readpages_end;
> err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
> @@ -972,81 +1049,20 @@ static void fuse_readahead(struct readahead_control *rac)
> {
> struct inode *inode = rac->mapping->host;
> struct fuse_conn *fc = get_fuse_conn(inode);
> - unsigned int max_pages, nr_pages;
> - struct folio *folio = NULL;
> + struct fuse_fill_read_data data = {
> + .file = rac->file,
> + .readahead = true,
> + .fc = fc,
> + .rac = rac,
> + };
>
> if (fuse_is_bad(inode))
> return;
>
> - max_pages = min_t(unsigned int, fc->max_pages,
> - fc->max_read / PAGE_SIZE);
> -
> - /*
> - * This is only accurate the first time through, since readahead_folio()
> - * doesn't update readahead_count() from the previous folio until the
> - * next call. Grab nr_pages here so we know how many pages we're going
> - * to have to process. This means that we will exit here with
> - * readahead_count() == folio_nr_pages(last_folio), but we will have
> - * consumed all of the folios, and read_pages() will call
> - * readahead_folio() again which will clean up the rac.
> - */
> - nr_pages = readahead_count(rac);
> -
> - while (nr_pages) {
> - struct fuse_io_args *ia;
> - struct fuse_args_pages *ap;
> - unsigned cur_pages = min(max_pages, nr_pages);
> - unsigned int pages = 0;
> -
> - if (fc->num_background >= fc->congestion_threshold &&
> - rac->ra->async_size >= readahead_count(rac))
> - /*
> - * Congested and only async pages left, so skip the
> - * rest.
> - */
> - break;
> -
> - ia = fuse_io_alloc(NULL, cur_pages);
> - if (!ia)
> - break;
> - ap = &ia->ap;
> -
> - while (pages < cur_pages) {
> - unsigned int folio_pages;
> -
> - /*
> - * This returns a folio with a ref held on it.
> - * The ref needs to be held until the request is
> - * completed, since the splice case (see
> - * fuse_try_move_page()) drops the ref after it's
> - * replaced in the page cache.
> - */
> - if (!folio)
> - folio = __readahead_folio(rac);
> -
> - folio_pages = folio_nr_pages(folio);
> - if (folio_pages > cur_pages - pages) {
> - /*
> - * Large folios belonging to fuse will never
> - * have more pages than max_pages.
> - */
> - WARN_ON(!pages);
> - break;
> - }
> -
> - ap->folios[ap->num_folios] = folio;
> - ap->descs[ap->num_folios].length = folio_size(folio);
> - ap->num_folios++;
> - pages += folio_pages;
> - folio = NULL;
> - }
> - fuse_send_readpages(ia, rac->file, pages << PAGE_SHIFT);
> - nr_pages -= pages;
> - }
> - if (folio) {
> - folio_end_read(folio, false);
> - folio_put(folio);
> - }
> + iomap_readahead(rac, &fuse_iomap_ops, &fuse_iomap_read_ops, &data);
> + if (data.ia)
> + fuse_send_readpages(data.ia, data.file, data.nr_bytes,
> + fc->async_read);
> }
>
> static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
> @@ -2077,7 +2093,7 @@ struct fuse_fill_wb_data {
> struct fuse_file *ff;
> unsigned int max_folios;
> /*
> - * nr_bytes won't overflow since fuse_writepage_need_send() caps
> + * nr_bytes won't overflow since fuse_folios_need_send() caps
> * wb requests to never exceed fc->max_pages (which has an upper bound
> * of U16_MAX).
> */
> @@ -2122,14 +2138,15 @@ static void fuse_writepages_send(struct inode *inode,
> spin_unlock(&fi->lock);
> }
>
> -static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
> - unsigned len, struct fuse_args_pages *ap,
> - struct fuse_fill_wb_data *data)
> +static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
> + unsigned len, struct fuse_args_pages *ap,
> + unsigned cur_bytes, bool write)
> {
> struct folio *prev_folio;
> struct fuse_folio_desc prev_desc;
> - unsigned bytes = data->nr_bytes + len;
> + unsigned bytes = cur_bytes + len;
> loff_t prev_pos;
> + size_t max_bytes = write ? fc->max_write : fc->max_read;
>
> WARN_ON(!ap->num_folios);
>
> @@ -2137,8 +2154,7 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
> if ((bytes + PAGE_SIZE - 1) >> PAGE_SHIFT > fc->max_pages)
> return true;
>
> - /* Reached max write bytes */
> - if (bytes > fc->max_write)
> + if (bytes > max_bytes)
> return true;
>
> /* Discontinuity */
> @@ -2148,11 +2164,6 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
> if (prev_pos != pos)
> return true;
>
> - /* Need to grow the pages array? If so, did the expansion fail? */
> - if (ap->num_folios == data->max_folios &&
> - !fuse_pages_realloc(data, fc->max_pages))
> - return true;
> -
> return false;
> }
>
> @@ -2176,10 +2187,21 @@ static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc,
> return -EIO;
> }
>
> - if (wpa && fuse_writepage_need_send(fc, pos, len, ap, data)) {
> - fuse_writepages_send(inode, data);
> - data->wpa = NULL;
> - data->nr_bytes = 0;
> + if (wpa) {
> + bool send = fuse_folios_need_send(fc, pos, len, ap, data->nr_bytes,
> + true);
> +
> + if (!send) {
> + /* Need to grow the pages array? If so, did the expansion fail? */
> + send = (ap->num_folios == data->max_folios) &&
> + !fuse_pages_realloc(data, fc->max_pages);
> + }
What purpose this code relocation serve? I gather the idea here is that
writes need to reallocate the pages array, whereas readahead can simply
constrain to whatever's already allocated?
--D
> +
> + if (send) {
> + fuse_writepages_send(inode, data);
> + data->wpa = NULL;
> + data->nr_bytes = 0;
> + }
> }
>
> if (data->wpa == NULL) {
> --
> 2.47.3
>
>
next prev parent reply other threads:[~2025-09-03 21:17 UTC|newest]
Thread overview: 64+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-29 23:56 [PATCH v1 00/16] fuse: use iomap for buffered reads + readahead Joanne Koong
2025-08-29 23:56 ` [PATCH v1 01/16] iomap: move async bio read logic into helper function Joanne Koong
2025-09-03 20:16 ` Darrick J. Wong
2025-09-04 6:00 ` Christoph Hellwig
2025-09-04 21:44 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 02/16] iomap: rename cur_folio_in_bio to folio_unlocked Joanne Koong
2025-09-03 20:26 ` [PATCH v1 02/16] iomap: rename cur_folio_in_bio to folio_unlockedOM Darrick J. Wong
2025-09-04 6:03 ` Christoph Hellwig
2025-09-04 22:06 ` Joanne Koong
2025-09-05 15:03 ` Darrick J. Wong
2025-08-29 23:56 ` [PATCH v1 03/16] iomap: refactor read/readahead completion Joanne Koong
2025-09-04 6:05 ` Christoph Hellwig
2025-09-04 23:16 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 04/16] iomap: use iomap_iter->private for stashing read/readahead bio Joanne Koong
2025-09-03 20:30 ` Darrick J. Wong
2025-09-04 6:07 ` Christoph Hellwig
2025-09-04 22:20 ` Joanne Koong
2025-09-04 23:15 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 05/16] iomap: propagate iomap_read_folio() error to caller Joanne Koong
2025-09-03 20:32 ` Darrick J. Wong
2025-09-04 6:09 ` Christoph Hellwig
2025-09-04 21:13 ` Matthew Wilcox
2025-09-22 16:49 ` Joanne Koong
2025-09-23 18:34 ` Darrick J. Wong
2025-09-24 23:12 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 06/16] iomap: move read/readahead logic out of CONFIG_BLOCK guard Joanne Koong
2025-08-29 23:56 ` [PATCH v1 07/16] iomap: iterate through entire folio in iomap_readpage_iter() Joanne Koong
2025-09-03 20:43 ` Darrick J. Wong
2025-09-04 22:37 ` Joanne Koong
2025-09-04 6:14 ` Christoph Hellwig
2025-09-04 22:45 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 08/16] iomap: rename iomap_readpage_iter() to iomap_readfolio_iter() Joanne Koong
2025-09-04 6:15 ` Christoph Hellwig
2025-09-04 22:47 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 09/16] iomap: rename iomap_readpage_ctx struct to iomap_readfolio_ctx Joanne Koong
2025-09-03 20:44 ` Darrick J. Wong
2025-08-29 23:56 ` [PATCH v1 10/16] iomap: add iomap_start_folio_read() helper Joanne Koong
2025-09-03 20:52 ` Darrick J. Wong
2025-08-29 23:56 ` [PATCH v1 11/16] iomap: make start folio read and finish folio read public APIs Joanne Koong
2025-09-03 20:53 ` Darrick J. Wong
2025-09-04 6:15 ` Christoph Hellwig
2025-08-29 23:56 ` [PATCH v1 12/16] iomap: add iomap_read_ops for read and readahead Joanne Koong
2025-09-03 21:08 ` Darrick J. Wong
2025-09-04 20:58 ` Joanne Koong
2025-09-04 6:21 ` Christoph Hellwig
2025-09-04 21:38 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 13/16] iomap: add a private arg " Joanne Koong
2025-08-30 1:54 ` Gao Xiang
2025-09-02 21:24 ` Joanne Koong
2025-09-03 1:55 ` Gao Xiang
2025-09-04 23:29 ` Joanne Koong
2025-09-05 2:21 ` Gao Xiang
2025-09-05 15:21 ` Darrick J. Wong
2025-09-08 6:56 ` Gao Xiang
2025-09-03 21:11 ` Darrick J. Wong
2025-08-29 23:56 ` [PATCH v1 14/16] fuse: use iomap for read_folio Joanne Koong
2025-09-03 21:13 ` Darrick J. Wong
2025-08-29 23:56 ` [PATCH v1 15/16] fuse: use iomap for readahead Joanne Koong
2025-09-03 21:17 ` Darrick J. Wong [this message]
2025-09-04 19:40 ` Joanne Koong
2025-09-04 19:46 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 16/16] fuse: remove fuse_readpages_end() null mapping check Joanne Koong
2025-09-02 9:21 ` Miklos Szeredi
2025-09-02 21:19 ` Joanne Koong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250903211754.GW1587915@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=brauner@kernel.org \
--cc=hch@infradead.org \
--cc=joannelkoong@gmail.com \
--cc=kernel-team@meta.com \
--cc=linux-doc@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
--cc=miklos@szeredi.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox