linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: Joanne Koong <joannelkoong@gmail.com>
Cc: brauner@kernel.org, miklos@szeredi.hu, hch@infradead.org,
	hsiangkao@linux.alibaba.com, linux-block@vger.kernel.org,
	gfs2@lists.linux.dev, linux-fsdevel@vger.kernel.org,
	kernel-team@meta.com, linux-xfs@vger.kernel.org,
	linux-doc@vger.kernel.org
Subject: Re: [PATCH v3 14/15] fuse: use iomap for readahead
Date: Thu, 18 Sep 2025 15:35:26 -0700	[thread overview]
Message-ID: <20250918223526.GA1587915@frogsfrogsfrogs> (raw)
In-Reply-To: <20250916234425.1274735-15-joannelkoong@gmail.com>

On Tue, Sep 16, 2025 at 04:44:24PM -0700, Joanne Koong wrote:
> Do readahead in fuse using iomap. This gives us granular uptodate
> tracking for large folios, which optimizes how much data needs to be
> read in. If some portions of the folio are already uptodate (eg through
> a prior write), we only need to read in the non-uptodate portions.
> 
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>

Looks generally ok to me,
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>

--D

> ---
>  fs/fuse/file.c | 220 ++++++++++++++++++++++++++++---------------------
>  1 file changed, 124 insertions(+), 96 deletions(-)
> 
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 4f27a3b0c20a..db0b1f20fee4 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -844,8 +844,65 @@ static const struct iomap_ops fuse_iomap_ops = {
>  
>  struct fuse_fill_read_data {
>  	struct file *file;
> +
> +	/* Fields below are used if sending the read request asynchronously */
> +	struct fuse_conn *fc;
> +	struct fuse_io_args *ia;
> +	unsigned int nr_bytes;
>  };
>  
> +/* forward declarations */
> +static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
> +				  unsigned len, struct fuse_args_pages *ap,
> +				  unsigned cur_bytes, bool write);
> +static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
> +				unsigned int count, bool async);
> +
> +static int fuse_handle_readahead(struct folio *folio,
> +				 struct readahead_control *rac,
> +				 struct fuse_fill_read_data *data, loff_t pos,
> +				 size_t len)
> +{
> +	struct fuse_io_args *ia = data->ia;
> +	size_t off = offset_in_folio(folio, pos);
> +	struct fuse_conn *fc = data->fc;
> +	struct fuse_args_pages *ap;
> +	unsigned int nr_pages;
> +
> +	if (ia && fuse_folios_need_send(fc, pos, len, &ia->ap, data->nr_bytes,
> +					false)) {
> +		fuse_send_readpages(ia, data->file, data->nr_bytes,
> +				    fc->async_read);
> +		data->nr_bytes = 0;
> +		data->ia = NULL;
> +		ia = NULL;
> +	}
> +	if (!ia) {
> +		if (fc->num_background >= fc->congestion_threshold &&
> +		    rac->ra->async_size >= readahead_count(rac))
> +			/*
> +			 * Congested and only async pages left, so skip the
> +			 * rest.
> +			 */
> +			return -EAGAIN;
> +
> +		nr_pages = min(fc->max_pages, readahead_count(rac));
> +		data->ia = fuse_io_alloc(NULL, nr_pages);
> +		if (!data->ia)
> +			return -ENOMEM;
> +		ia = data->ia;
> +	}
> +	folio_get(folio);
> +	ap = &ia->ap;
> +	ap->folios[ap->num_folios] = folio;
> +	ap->descs[ap->num_folios].offset = off;
> +	ap->descs[ap->num_folios].length = len;
> +	data->nr_bytes += len;
> +	ap->num_folios++;
> +
> +	return 0;
> +}
> +
>  static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
>  					     struct iomap_read_folio_ctx *ctx,
>  					     size_t len)
> @@ -857,18 +914,40 @@ static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
>  	struct file *file = data->file;
>  	int ret;
>  
> -	/*
> -	 *  for non-readahead read requests, do reads synchronously since
> -	 *  it's not guaranteed that the server can handle out-of-order reads
> -	 */
>  	iomap_start_folio_read(folio, len);
> -	ret = fuse_do_readfolio(file, folio, off, len);
> -	iomap_finish_folio_read(folio, off, len, ret);
> +	if (ctx->rac) {
> +		ret = fuse_handle_readahead(folio, ctx->rac, data, pos, len);
> +		/*
> +		 * If fuse_handle_readahead was successful, fuse_readpages_end
> +		 * will do the iomap_finish_folio_read, else we need to call it
> +		 * here
> +		 */
> +		if (ret)
> +			iomap_finish_folio_read(folio, off, len, ret);
> +	} else {
> +		/*
> +		 *  for non-readahead read requests, do reads synchronously
> +		 *  since it's not guaranteed that the server can handle
> +		 *  out-of-order reads
> +		 */
> +		ret = fuse_do_readfolio(file, folio, off, len);
> +		iomap_finish_folio_read(folio, off, len, ret);
> +	}
>  	return ret;
>  }
>  
> +static void fuse_iomap_read_submit(struct iomap_read_folio_ctx *ctx)
> +{
> +	struct fuse_fill_read_data *data = ctx->read_ctx;
> +
> +	if (data->ia)
> +		fuse_send_readpages(data->ia, data->file, data->nr_bytes,
> +				    data->fc->async_read);
> +}
> +
>  static const struct iomap_read_ops fuse_iomap_read_ops = {
>  	.read_folio_range = fuse_iomap_read_folio_range_async,
> +	.submit_read = fuse_iomap_read_submit,
>  };
>  
>  static int fuse_read_folio(struct file *file, struct folio *folio)
> @@ -930,7 +1009,8 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
>  	}
>  
>  	for (i = 0; i < ap->num_folios; i++) {
> -		folio_end_read(ap->folios[i], !err);
> +		iomap_finish_folio_read(ap->folios[i], ap->descs[i].offset,
> +					ap->descs[i].length, err);
>  		folio_put(ap->folios[i]);
>  	}
>  	if (ia->ff)
> @@ -940,7 +1020,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
>  }
>  
>  static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
> -				unsigned int count)
> +				unsigned int count, bool async)
>  {
>  	struct fuse_file *ff = file->private_data;
>  	struct fuse_mount *fm = ff->fm;
> @@ -962,7 +1042,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
>  
>  	fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
>  	ia->read.attr_ver = fuse_get_attr_version(fm->fc);
> -	if (fm->fc->async_read) {
> +	if (async) {
>  		ia->ff = fuse_file_get(ff);
>  		ap->args.end = fuse_readpages_end;
>  		err = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
> @@ -979,81 +1059,20 @@ static void fuse_readahead(struct readahead_control *rac)
>  {
>  	struct inode *inode = rac->mapping->host;
>  	struct fuse_conn *fc = get_fuse_conn(inode);
> -	unsigned int max_pages, nr_pages;
> -	struct folio *folio = NULL;
> +	struct fuse_fill_read_data data = {
> +		.file = rac->file,
> +		.fc = fc,
> +	};
> +	struct iomap_read_folio_ctx ctx = {
> +		.ops = &fuse_iomap_read_ops,
> +		.rac = rac,
> +		.read_ctx = &data
> +	};
>  
>  	if (fuse_is_bad(inode))
>  		return;
>  
> -	max_pages = min_t(unsigned int, fc->max_pages,
> -			fc->max_read / PAGE_SIZE);
> -
> -	/*
> -	 * This is only accurate the first time through, since readahead_folio()
> -	 * doesn't update readahead_count() from the previous folio until the
> -	 * next call.  Grab nr_pages here so we know how many pages we're going
> -	 * to have to process.  This means that we will exit here with
> -	 * readahead_count() == folio_nr_pages(last_folio), but we will have
> -	 * consumed all of the folios, and read_pages() will call
> -	 * readahead_folio() again which will clean up the rac.
> -	 */
> -	nr_pages = readahead_count(rac);
> -
> -	while (nr_pages) {
> -		struct fuse_io_args *ia;
> -		struct fuse_args_pages *ap;
> -		unsigned cur_pages = min(max_pages, nr_pages);
> -		unsigned int pages = 0;
> -
> -		if (fc->num_background >= fc->congestion_threshold &&
> -		    rac->ra->async_size >= readahead_count(rac))
> -			/*
> -			 * Congested and only async pages left, so skip the
> -			 * rest.
> -			 */
> -			break;
> -
> -		ia = fuse_io_alloc(NULL, cur_pages);
> -		if (!ia)
> -			break;
> -		ap = &ia->ap;
> -
> -		while (pages < cur_pages) {
> -			unsigned int folio_pages;
> -
> -			/*
> -			 * This returns a folio with a ref held on it.
> -			 * The ref needs to be held until the request is
> -			 * completed, since the splice case (see
> -			 * fuse_try_move_page()) drops the ref after it's
> -			 * replaced in the page cache.
> -			 */
> -			if (!folio)
> -				folio =  __readahead_folio(rac);
> -
> -			folio_pages = folio_nr_pages(folio);
> -			if (folio_pages > cur_pages - pages) {
> -				/*
> -				 * Large folios belonging to fuse will never
> -				 * have more pages than max_pages.
> -				 */
> -				WARN_ON(!pages);
> -				break;
> -			}
> -
> -			ap->folios[ap->num_folios] = folio;
> -			ap->descs[ap->num_folios].length = folio_size(folio);
> -			ap->num_folios++;
> -			pages += folio_pages;
> -			folio = NULL;
> -		}
> -		fuse_send_readpages(ia, rac->file, pages << PAGE_SHIFT);
> -		nr_pages -= pages;
> -	}
> -	if (folio) {
> -		folio_end_read(folio, false);
> -		folio_put(folio);
> -	}
> +	iomap_readahead(&fuse_iomap_ops, &ctx);
>  }
>  
>  static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
> @@ -2084,7 +2103,7 @@ struct fuse_fill_wb_data {
>  	struct fuse_file *ff;
>  	unsigned int max_folios;
>  	/*
> -	 * nr_bytes won't overflow since fuse_writepage_need_send() caps
> +	 * nr_bytes won't overflow since fuse_folios_need_send() caps
>  	 * wb requests to never exceed fc->max_pages (which has an upper bound
>  	 * of U16_MAX).
>  	 */
> @@ -2129,14 +2148,15 @@ static void fuse_writepages_send(struct inode *inode,
>  	spin_unlock(&fi->lock);
>  }
>  
> -static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
> -				     unsigned len, struct fuse_args_pages *ap,
> -				     struct fuse_fill_wb_data *data)
> +static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos,
> +				  unsigned len, struct fuse_args_pages *ap,
> +				  unsigned cur_bytes, bool write)
>  {
>  	struct folio *prev_folio;
>  	struct fuse_folio_desc prev_desc;
> -	unsigned bytes = data->nr_bytes + len;
> +	unsigned bytes = cur_bytes + len;
>  	loff_t prev_pos;
> +	size_t max_bytes = write ? fc->max_write : fc->max_read;
>  
>  	WARN_ON(!ap->num_folios);
>  
> @@ -2144,8 +2164,7 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
>  	if ((bytes + PAGE_SIZE - 1) >> PAGE_SHIFT > fc->max_pages)
>  		return true;
>  
> -	/* Reached max write bytes */
> -	if (bytes > fc->max_write)
> +	if (bytes > max_bytes)
>  		return true;
>  
>  	/* Discontinuity */
> @@ -2155,11 +2174,6 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos,
>  	if (prev_pos != pos)
>  		return true;
>  
> -	/* Need to grow the pages array?  If so, did the expansion fail? */
> -	if (ap->num_folios == data->max_folios &&
> -	    !fuse_pages_realloc(data, fc->max_pages))
> -		return true;
> -
>  	return false;
>  }
>  
> @@ -2183,10 +2197,24 @@ static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc,
>  			return -EIO;
>  	}
>  
> -	if (wpa && fuse_writepage_need_send(fc, pos, len, ap, data)) {
> -		fuse_writepages_send(inode, data);
> -		data->wpa = NULL;
> -		data->nr_bytes = 0;
> +	if (wpa) {
> +		bool send = fuse_folios_need_send(fc, pos, len, ap,
> +						  data->nr_bytes, true);
> +
> +		if (!send) {
> +			/*
> +			 * Need to grow the pages array?  If so, did the
> +			 * expansion fail?
> +			 */
> +			send = (ap->num_folios == data->max_folios) &&
> +				!fuse_pages_realloc(data, fc->max_pages);
> +		}
> +
> +		if (send) {
> +			fuse_writepages_send(inode, data);
> +			data->wpa = NULL;
> +			data->nr_bytes = 0;
> +		}
>  	}
>  
>  	if (data->wpa == NULL) {
> -- 
> 2.47.3
> 
> 

  reply	other threads:[~2025-09-18 22:35 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-16 23:44 [PATCH v3 00/15] fuse: use iomap for buffered reads + readahead Joanne Koong
2025-09-16 23:44 ` [PATCH v3 01/15] iomap: move bio read logic into helper function Joanne Koong
2025-09-18 21:27   ` Darrick J. Wong
2025-09-16 23:44 ` [PATCH v3 02/15] iomap: move read/readahead bio submission " Joanne Koong
2025-09-18 21:27   ` Darrick J. Wong
2025-09-16 23:44 ` [PATCH v3 03/15] iomap: store read/readahead bio generically Joanne Koong
2025-09-18 21:29   ` Darrick J. Wong
2025-09-16 23:44 ` [PATCH v3 04/15] iomap: iterate over entire folio in iomap_readpage_iter() Joanne Koong
2025-09-18 21:37   ` Darrick J. Wong
2025-09-22 22:33   ` Joanne Koong
2025-09-16 23:44 ` [PATCH v3 05/15] iomap: rename iomap_readpage_iter() to iomap_read_folio_iter() Joanne Koong
2025-09-16 23:44 ` [PATCH v3 06/15] iomap: rename iomap_readpage_ctx struct to iomap_read_folio_ctx Joanne Koong
2025-09-16 23:44 ` [PATCH v3 07/15] iomap: track read/readahead folio ownership internally Joanne Koong
2025-09-18 21:49   ` Darrick J. Wong
2025-09-19 18:14     ` Joanne Koong
2025-09-16 23:44 ` [PATCH v3 08/15] iomap: add public start/finish folio read helpers Joanne Koong
2025-09-16 23:44 ` [PATCH v3 09/15] iomap: add caller-provided callbacks for read and readahead Joanne Koong
2025-09-16 23:44 ` [PATCH v3 10/15] iomap: add bias for async read requests Joanne Koong
2025-09-18 22:30   ` Darrick J. Wong
2025-09-19 18:34     ` Joanne Koong
2025-09-22 18:33     ` Christoph Hellwig
2025-09-22 20:54       ` Matthew Wilcox
2025-09-24 22:56         ` Joanne Koong
2025-09-22 23:19   ` Joanne Koong
2025-09-16 23:44 ` [PATCH v3 11/15] iomap: move buffered io bio logic into new file Joanne Koong
2025-09-17 21:40   ` kernel test robot
2025-09-18 22:31   ` Darrick J. Wong
2025-09-19 15:33     ` Christoph Hellwig
2025-09-19 15:32   ` Christoph Hellwig
2025-09-16 23:44 ` [PATCH v3 12/15] iomap: make iomap_read_folio() a void return Joanne Koong
2025-09-18 21:55   ` Darrick J. Wong
2025-09-16 23:44 ` [PATCH v3 13/15] fuse: use iomap for read_folio Joanne Koong
2025-09-16 23:44 ` [PATCH v3 14/15] fuse: use iomap for readahead Joanne Koong
2025-09-18 22:35   ` Darrick J. Wong [this message]
2025-09-16 23:44 ` [PATCH v3 15/15] fuse: remove fc->blkbits workaround for partial writes Joanne Koong
2025-09-18 22:35   ` Darrick J. Wong
2025-09-17  8:30 ` [syzbot ci] Re: fuse: use iomap for buffered reads + readahead syzbot ci
2025-09-17 19:59   ` Joanne Koong
2025-09-18 15:48     ` Aleksandr Nogikh
2025-09-18 21:15       ` Joanne Koong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250918223526.GA1587915@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=brauner@kernel.org \
    --cc=gfs2@lists.linux.dev \
    --cc=hch@infradead.org \
    --cc=hsiangkao@linux.alibaba.com \
    --cc=joannelkoong@gmail.com \
    --cc=kernel-team@meta.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).