From: Joanne Koong <joannelkoong@gmail.com>
To: brauner@kernel.org, miklos@szeredi.hu
Cc: hch@infradead.org, djwong@kernel.org,
linux-fsdevel@vger.kernel.org, kernel-team@meta.com,
linux-xfs@vger.kernel.org, linux-doc@vger.kernel.org
Subject: [PATCH v1 15/16] fuse: use iomap for readahead
Date: Fri, 29 Aug 2025 16:56:26 -0700 [thread overview]
Message-ID: <20250829235627.4053234-16-joannelkoong@gmail.com> (raw)
In-Reply-To: <20250829235627.4053234-1-joannelkoong@gmail.com>
Do readahead in fuse using iomap. This gives us granular uptodate
tracking for large folios, which optimizes how much data needs to be
read in. If some portions of the folio are already uptodate (eg through
a prior write), we only need to read in the non-uptodate portions.
Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
fs/fuse/file.c | 214 +++++++++++++++++++++++++++----------------------
1 file changed, 118 insertions(+), 96 deletions(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index bdfb13cdee4b..1659603f4cb6 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -844,8 +844,73 @@ static const struct iomap_ops fuse_iomap_ops = {
struct fuse_fill_read_data {
struct file *file;
+ /*
+ * We need to track this because non-readahead requests can't be sent
+ * asynchronously.
+ */
+ bool readahead : 1;
+
+ /*
+ * Fields below are used if sending the read request
+ * asynchronously.
+ */
+ struct fuse_conn *fc;
+ struct readahead_control *rac;
+ struct fuse_io_args *ia;
+ unsigned int nr_bytes;
};
+/* forward declarations */
+static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
+ unsigned len, struct fuse_args_pages *ap,
+ unsigned cur_bytes, bool write);
+static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
+ unsigned int count, bool async);
+
+static int fuse_handle_readahead(struct folio *folio,
+ struct fuse_fill_read_data *data, loff_t pos,
+ size_t len)
+{
+ struct fuse_io_args *ia = data->ia;
+ size_t off = offset_in_folio(folio, pos);
+ struct fuse_conn *fc = data->fc;
+ struct fuse_args_pages *ap;
+
+ if (ia && fuse_folios_need_send(fc, pos, len, &ia->ap, data->nr_bytes,
+ false)) {
+ fuse_send_readpages(ia, data->file, data->nr_bytes,
+ fc->async_read);
+ data->nr_bytes = 0;
+ ia = NULL;
+ }
+ if (!ia) {
+ struct readahead_control *rac = data->rac;
+ unsigned nr_pages = min(fc->max_pages, readahead_count(rac));
+
+ if (fc->num_background >= fc->congestion_threshold &&
+ rac->ra->async_size >= readahead_count(rac))
+ /*
+ * Congested and only async pages left, so skip the
+ * rest.
+ */
+ return -EAGAIN;
+
+ data->ia = fuse_io_alloc(NULL, nr_pages);
+ if (!data->ia)
+ return -ENOMEM;
+ ia = data->ia;
+ }
+ folio_get(folio);
+ ap = &ia->ap;
+ ap->folios[ap->num_folios] = folio;
+ ap->descs[ap->num_folios].offset = off;
+ ap->descs[ap->num_folios].length = len;
+ data->nr_bytes += len;
+ ap->num_folios++;
+
+ return 0;
+}
+
static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
struct folio *folio, loff_t pos,
size_t len)
@@ -855,13 +920,24 @@ static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
size_t off = offset_in_folio(folio, pos);
int ret;
- /*
- * for non-readahead read requests, do reads synchronously since
- * it's not guaranteed that the server can handle out-of-order reads
- */
iomap_start_folio_read(folio, len);
- ret = fuse_do_readfolio(file, folio, off, len);
- iomap_finish_folio_read(folio, off, len, ret);
+ if (data->readahead) {
+ ret = fuse_handle_readahead(folio, data, pos, len);
+ /*
+ * If fuse_handle_readahead was successful, fuse_readpages_end
+ * will do the iomap_finish_folio_read, else we need to call it
+ * here
+ */
+ if (ret)
+ iomap_finish_folio_read(folio, off, len, ret);
+ } else {
+ /*
+ * for non-readahead read requests, do reads synchronously since
+ * it's not guaranteed that the server can handle out-of-order reads
+ */
+ ret = fuse_do_readfolio(file, folio, off, len);
+ iomap_finish_folio_read(folio, off, len, ret);
+ }
return ret;
}
@@ -923,7 +999,8 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
}
for (i = 0; i < ap->num_folios; i++) {
- folio_end_read(ap->folios[i], !err);
+ iomap_finish_folio_read(ap->folios[i], ap->descs[i].offset,
+ ap->descs[i].length, err);
folio_put(ap->folios[i]);
}
if (ia->ff)
@@ -933,7 +1010,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
}
static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
- unsigned int count)
+ unsigned int count, bool async)
{
struct fuse_file *ff = file->private_data;
struct fuse_mount *fm = ff->fm;
@@ -955,7 +1032,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
ia->read.attr_ver = fuse_get_attr_version(fm->fc);
- if (fm->fc->async_read) {
+ if (async) {
ia->ff = fuse_file_get(ff);
ap->args.end = fuse_readpages_end;
err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
@@ -972,81 +1049,20 @@ static void fuse_readahead(struct readahead_control *rac)
{
struct inode *inode = rac->mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
- unsigned int max_pages, nr_pages;
- struct folio *folio = NULL;
+ struct fuse_fill_read_data data = {
+ .file = rac->file,
+ .readahead = true,
+ .fc = fc,
+ .rac = rac,
+ };
if (fuse_is_bad(inode))
return;
- max_pages = min_t(unsigned int, fc->max_pages,
- fc->max_read / PAGE_SIZE);
-
- /*
- * This is only accurate the first time through, since readahead_folio()
- * doesn't update readahead_count() from the previous folio until the
- * next call. Grab nr_pages here so we know how many pages we're going
- * to have to process. This means that we will exit here with
- * readahead_count() == folio_nr_pages(last_folio), but we will have
- * consumed all of the folios, and read_pages() will call
- * readahead_folio() again which will clean up the rac.
- */
- nr_pages = readahead_count(rac);
-
- while (nr_pages) {
- struct fuse_io_args *ia;
- struct fuse_args_pages *ap;
- unsigned cur_pages = min(max_pages, nr_pages);
- unsigned int pages = 0;
-
- if (fc->num_background >= fc->congestion_threshold &&
- rac->ra->async_size >= readahead_count(rac))
- /*
- * Congested and only async pages left, so skip the
- * rest.
- */
- break;
-
- ia = fuse_io_alloc(NULL, cur_pages);
- if (!ia)
- break;
- ap = &ia->ap;
-
- while (pages < cur_pages) {
- unsigned int folio_pages;
-
- /*
- * This returns a folio with a ref held on it.
- * The ref needs to be held until the request is
- * completed, since the splice case (see
- * fuse_try_move_page()) drops the ref after it's
- * replaced in the page cache.
- */
- if (!folio)
- folio = __readahead_folio(rac);
-
- folio_pages = folio_nr_pages(folio);
- if (folio_pages > cur_pages - pages) {
- /*
- * Large folios belonging to fuse will never
- * have more pages than max_pages.
- */
- WARN_ON(!pages);
- break;
- }
-
- ap->folios[ap->num_folios] = folio;
- ap->descs[ap->num_folios].length = folio_size(folio);
- ap->num_folios++;
- pages += folio_pages;
- folio = NULL;
- }
- fuse_send_readpages(ia, rac->file, pages << PAGE_SHIFT);
- nr_pages -= pages;
- }
- if (folio) {
- folio_end_read(folio, false);
- folio_put(folio);
- }
+ iomap_readahead(rac, &fuse_iomap_ops, &fuse_iomap_read_ops, &data);
+ if (data.ia)
+ fuse_send_readpages(data.ia, data.file, data.nr_bytes,
+ fc->async_read);
}
static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -2077,7 +2093,7 @@ struct fuse_fill_wb_data {
struct fuse_file *ff;
unsigned int max_folios;
/*
- * nr_bytes won't overflow since fuse_writepage_need_send() caps
+ * nr_bytes won't overflow since fuse_folios_need_send() caps
* wb requests to never exceed fc->max_pages (which has an upper bound
* of U16_MAX).
*/
@@ -2122,14 +2138,15 @@ static void fuse_writepages_send(struct inode *inode,
spin_unlock(&fi->lock);
}
-static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
- unsigned len, struct fuse_args_pages *ap,
- struct fuse_fill_wb_data *data)
+static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
+ unsigned len, struct fuse_args_pages *ap,
+ unsigned cur_bytes, bool write)
{
struct folio *prev_folio;
struct fuse_folio_desc prev_desc;
- unsigned bytes = data->nr_bytes + len;
+ unsigned bytes = cur_bytes + len;
loff_t prev_pos;
+ size_t max_bytes = write ? fc->max_write : fc->max_read;
WARN_ON(!ap->num_folios);
@@ -2137,8 +2154,7 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
if ((bytes + PAGE_SIZE - 1) >> PAGE_SHIFT > fc->max_pages)
return true;
- /* Reached max write bytes */
- if (bytes > fc->max_write)
+ if (bytes > max_bytes)
return true;
/* Discontinuity */
@@ -2148,11 +2164,6 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
if (prev_pos != pos)
return true;
- /* Need to grow the pages array? If so, did the expansion fail? */
- if (ap->num_folios == data->max_folios &&
- !fuse_pages_realloc(data, fc->max_pages))
- return true;
-
return false;
}
@@ -2176,10 +2187,21 @@ static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc,
return -EIO;
}
- if (wpa && fuse_writepage_need_send(fc, pos, len, ap, data)) {
- fuse_writepages_send(inode, data);
- data->wpa = NULL;
- data->nr_bytes = 0;
+ if (wpa) {
+ bool send = fuse_folios_need_send(fc, pos, len, ap, data->nr_bytes,
+ true);
+
+ if (!send) {
+ /* Need to grow the pages array? If so, did the expansion fail? */
+ send = (ap->num_folios == data->max_folios) &&
+ !fuse_pages_realloc(data, fc->max_pages);
+ }
+
+ if (send) {
+ fuse_writepages_send(inode, data);
+ data->wpa = NULL;
+ data->nr_bytes = 0;
+ }
}
if (data->wpa == NULL) {
--
2.47.3
next prev parent reply other threads:[~2025-08-29 23:58 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-29 23:56 [PATCH v1 00/16] fuse: use iomap for buffered reads + readahead Joanne Koong
2025-08-29 23:56 ` [PATCH v1 01/16] iomap: move async bio read logic into helper function Joanne Koong
2025-09-03 20:16 ` Darrick J. Wong
2025-09-04 6:00 ` Christoph Hellwig
2025-09-04 21:44 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 02/16] iomap: rename cur_folio_in_bio to folio_unlocked Joanne Koong
2025-09-03 20:26 ` [PATCH v1 02/16] iomap: rename cur_folio_in_bio to folio_unlockedOM Darrick J. Wong
2025-09-04 6:03 ` Christoph Hellwig
2025-09-04 22:06 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 03/16] iomap: refactor read/readahead completion Joanne Koong
2025-09-04 6:05 ` Christoph Hellwig
2025-09-04 23:16 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 04/16] iomap: use iomap_iter->private for stashing read/readahead bio Joanne Koong
2025-09-03 20:30 ` Darrick J. Wong
2025-09-04 6:07 ` Christoph Hellwig
2025-09-04 22:20 ` Joanne Koong
2025-09-04 23:15 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 05/16] iomap: propagate iomap_read_folio() error to caller Joanne Koong
2025-09-03 20:32 ` Darrick J. Wong
2025-09-04 6:09 ` Christoph Hellwig
2025-09-04 21:13 ` Matthew Wilcox
2025-08-29 23:56 ` [PATCH v1 06/16] iomap: move read/readahead logic out of CONFIG_BLOCK guard Joanne Koong
2025-08-29 23:56 ` [PATCH v1 07/16] iomap: iterate through entire folio in iomap_readpage_iter() Joanne Koong
2025-09-03 20:43 ` Darrick J. Wong
2025-09-04 22:37 ` Joanne Koong
2025-09-04 6:14 ` Christoph Hellwig
2025-09-04 22:45 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 08/16] iomap: rename iomap_readpage_iter() to iomap_readfolio_iter() Joanne Koong
2025-09-04 6:15 ` Christoph Hellwig
2025-09-04 22:47 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 09/16] iomap: rename iomap_readpage_ctx struct to iomap_readfolio_ctx Joanne Koong
2025-09-03 20:44 ` Darrick J. Wong
2025-08-29 23:56 ` [PATCH v1 10/16] iomap: add iomap_start_folio_read() helper Joanne Koong
2025-09-03 20:52 ` Darrick J. Wong
2025-08-29 23:56 ` [PATCH v1 11/16] iomap: make start folio read and finish folio read public APIs Joanne Koong
2025-09-03 20:53 ` Darrick J. Wong
2025-09-04 6:15 ` Christoph Hellwig
2025-08-29 23:56 ` [PATCH v1 12/16] iomap: add iomap_read_ops for read and readahead Joanne Koong
2025-09-03 21:08 ` Darrick J. Wong
2025-09-04 20:58 ` Joanne Koong
2025-09-04 6:21 ` Christoph Hellwig
2025-09-04 21:38 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 13/16] iomap: add a private arg " Joanne Koong
2025-08-30 1:54 ` Gao Xiang
2025-09-02 21:24 ` Joanne Koong
2025-09-03 1:55 ` Gao Xiang
2025-09-04 23:29 ` Joanne Koong
2025-09-03 21:11 ` Darrick J. Wong
2025-08-29 23:56 ` [PATCH v1 14/16] fuse: use iomap for read_folio Joanne Koong
2025-09-03 21:13 ` Darrick J. Wong
2025-08-29 23:56 ` Joanne Koong [this message]
2025-09-03 21:17 ` [PATCH v1 15/16] fuse: use iomap for readahead Darrick J. Wong
2025-09-04 19:40 ` Joanne Koong
2025-09-04 19:46 ` Joanne Koong
2025-08-29 23:56 ` [PATCH v1 16/16] fuse: remove fuse_readpages_end() null mapping check Joanne Koong
2025-09-02 9:21 ` Miklos Szeredi
2025-09-02 21:19 ` Joanne Koong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250829235627.4053234-16-joannelkoong@gmail.com \
--to=joannelkoong@gmail.com \
--cc=brauner@kernel.org \
--cc=djwong@kernel.org \
--cc=hch@infradead.org \
--cc=kernel-team@meta.com \
--cc=linux-doc@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
--cc=miklos@szeredi.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).