All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brian Foster <bfoster@redhat.com>
To: Christoph Hellwig <hch@lst.de>
Cc: Christian Brauner <brauner@kernel.org>,
	"Darrick J. Wong" <djwong@kernel.org>,
	Joanne Koong <joannelkoong@gmail.com>,
	linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-block@vger.kernel.org,
	gfs2@lists.linux.dev
Subject: Re: [PATCH 06/12] iomap: move all ioend handling to ioend.c
Date: Fri, 27 Jun 2025 11:15:14 -0400	[thread overview]
Message-ID: <aF61grplkxy7RXie@bfoster> (raw)
In-Reply-To: <20250627070328.975394-7-hch@lst.de>

On Fri, Jun 27, 2025 at 09:02:39AM +0200, Christoph Hellwig wrote:
> Now that the writeback code has the proper abstractions, all the ioend
> code can be self-contained in ioend.c.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Joanne Koong <joannelkoong@gmail.com>
> ---
>  fs/iomap/buffered-io.c | 215 ----------------------------------------
>  fs/iomap/internal.h    |   1 -
>  fs/iomap/ioend.c       | 220 ++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 219 insertions(+), 217 deletions(-)
> 
...
> diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c
> index 18894ebba6db..ce0a4c13d008 100644
> --- a/fs/iomap/ioend.c
> +++ b/fs/iomap/ioend.c
> @@ -1,10 +1,13 @@
>  // SPDX-License-Identifier: GPL-2.0
>  /*
> - * Copyright (c) 2024-2025 Christoph Hellwig.
> + * Copyright (c) 2016-2025 Christoph Hellwig.
>   */
>  #include <linux/iomap.h>
>  #include <linux/list_sort.h>
> +#include <linux/pagemap.h>
> +#include <linux/writeback.h>
>  #include "internal.h"
> +#include "trace.h"

Can any of these now be dropped from buffered-io.c?

Otherwise LGTM:

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  
>  struct bio_set iomap_ioend_bioset;
>  EXPORT_SYMBOL_GPL(iomap_ioend_bioset);
> @@ -28,6 +31,221 @@ struct iomap_ioend *iomap_init_ioend(struct inode *inode,
>  }
>  EXPORT_SYMBOL_GPL(iomap_init_ioend);
>  
> +/*
> + * We're now finished for good with this ioend structure.  Update the folio
> + * state, release holds on bios, and finally free up memory.  Do not use the
> + * ioend after this.
> + */
> +static u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend)
> +{
> +	struct inode *inode = ioend->io_inode;
> +	struct bio *bio = &ioend->io_bio;
> +	struct folio_iter fi;
> +	u32 folio_count = 0;
> +
> +	if (ioend->io_error) {
> +		mapping_set_error(inode->i_mapping, ioend->io_error);
> +		if (!bio_flagged(bio, BIO_QUIET)) {
> +			pr_err_ratelimited(
> +"%s: writeback error on inode %lu, offset %lld, sector %llu",
> +				inode->i_sb->s_id, inode->i_ino,
> +				ioend->io_offset, ioend->io_sector);
> +		}
> +	}
> +
> +	/* walk all folios in bio, ending page IO on them */
> +	bio_for_each_folio_all(fi, bio) {
> +		iomap_finish_folio_write(inode, fi.folio, fi.length);
> +		folio_count++;
> +	}
> +
> +	bio_put(bio);	/* frees the ioend */
> +	return folio_count;
> +}
> +
> +static void ioend_writeback_end_bio(struct bio *bio)
> +{
> +	struct iomap_ioend *ioend = iomap_ioend_from_bio(bio);
> +
> +	ioend->io_error = blk_status_to_errno(bio->bi_status);
> +	iomap_finish_ioend_buffered(ioend);
> +}
> +
> +/*
> + * We cannot cancel the ioend directly in case of an error, so call the bio end
> + * I/O handler with the error status here to run the normal I/O completion
> + * handler.
> + */
> +int ioend_writeback_submit(struct iomap_writeback_ctx *wpc, int error)
> +{
> +	struct iomap_ioend *ioend = wpc->wb_ctx;
> +
> +	if (!ioend->io_bio.bi_end_io)
> +		ioend->io_bio.bi_end_io = ioend_writeback_end_bio;
> +
> +	if (WARN_ON_ONCE(wpc->iomap.flags & IOMAP_F_ANON_WRITE))
> +		error = -EIO;
> +
> +	if (error) {
> +		ioend->io_bio.bi_status = errno_to_blk_status(error);
> +		bio_endio(&ioend->io_bio);
> +		return error;
> +	}
> +
> +	submit_bio(&ioend->io_bio);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(ioend_writeback_submit);
> +
> +static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writeback_ctx *wpc,
> +		loff_t pos, u16 ioend_flags)
> +{
> +	struct bio *bio;
> +
> +	bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS,
> +			       REQ_OP_WRITE | wbc_to_write_flags(wpc->wbc),
> +			       GFP_NOFS, &iomap_ioend_bioset);
> +	bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos);
> +	bio->bi_write_hint = wpc->inode->i_write_hint;
> +	wbc_init_bio(wpc->wbc, bio);
> +	wpc->nr_folios = 0;
> +	return iomap_init_ioend(wpc->inode, bio, pos, ioend_flags);
> +}
> +
> +static bool iomap_can_add_to_ioend(struct iomap_writeback_ctx *wpc, loff_t pos,
> +		u16 ioend_flags)
> +{
> +	struct iomap_ioend *ioend = wpc->wb_ctx;
> +
> +	if (ioend_flags & IOMAP_IOEND_BOUNDARY)
> +		return false;
> +	if ((ioend_flags & IOMAP_IOEND_NOMERGE_FLAGS) !=
> +	    (ioend->io_flags & IOMAP_IOEND_NOMERGE_FLAGS))
> +		return false;
> +	if (pos != ioend->io_offset + ioend->io_size)
> +		return false;
> +	if (!(wpc->iomap.flags & IOMAP_F_ANON_WRITE) &&
> +	    iomap_sector(&wpc->iomap, pos) != bio_end_sector(&ioend->io_bio))
> +		return false;
> +	/*
> +	 * Limit ioend bio chain lengths to minimise IO completion latency. This
> +	 * also prevents long tight loops ending page writeback on all the
> +	 * folios in the ioend.
> +	 */
> +	if (wpc->nr_folios >= IOEND_BATCH_SIZE)
> +		return false;
> +	return true;
> +}
> +
> +/*
> + * Test to see if we have an existing ioend structure that we could append to
> + * first; otherwise finish off the current ioend and start another.
> + *
> + * If a new ioend is created and cached, the old ioend is submitted to the block
> + * layer instantly.  Batching optimisations are provided by higher level block
> + * plugging.
> + *
> + * At the end of a writeback pass, there will be a cached ioend remaining on the
> + * writepage context that the caller will need to submit.
> + */
> +ssize_t iomap_add_to_ioend(struct iomap_writeback_ctx *wpc, struct folio *folio,
> +		loff_t pos, loff_t end_pos, unsigned int dirty_len)
> +{
> +	struct iomap_ioend *ioend = wpc->wb_ctx;
> +	size_t poff = offset_in_folio(folio, pos);
> +	unsigned int ioend_flags = 0;
> +	unsigned int map_len = min_t(u64, dirty_len,
> +		wpc->iomap.offset + wpc->iomap.length - pos);
> +	int error;
> +
> +	trace_iomap_add_to_ioend(wpc->inode, pos, dirty_len, &wpc->iomap);
> +
> +	WARN_ON_ONCE(!folio->private && map_len < dirty_len);
> +
> +	switch (wpc->iomap.type) {
> +	case IOMAP_INLINE:
> +		WARN_ON_ONCE(1);
> +		return -EIO;
> +	case IOMAP_HOLE:
> +		return map_len;
> +	default:
> +		break;
> +	}
> +
> +	if (wpc->iomap.type == IOMAP_UNWRITTEN)
> +		ioend_flags |= IOMAP_IOEND_UNWRITTEN;
> +	if (wpc->iomap.flags & IOMAP_F_SHARED)
> +		ioend_flags |= IOMAP_IOEND_SHARED;
> +	if (folio_test_dropbehind(folio))
> +		ioend_flags |= IOMAP_IOEND_DONTCACHE;
> +	if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY))
> +		ioend_flags |= IOMAP_IOEND_BOUNDARY;
> +
> +	if (!ioend || !iomap_can_add_to_ioend(wpc, pos, ioend_flags)) {
> +new_ioend:
> +		if (ioend) {
> +			error = wpc->ops->writeback_submit(wpc, 0);
> +			if (error)
> +				return error;
> +		}
> +		wpc->wb_ctx = ioend = iomap_alloc_ioend(wpc, pos, ioend_flags);
> +	}
> +
> +	if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff))
> +		goto new_ioend;
> +
> +	iomap_start_folio_write(wpc->inode, folio, map_len);
> +
> +	/*
> +	 * Clamp io_offset and io_size to the incore EOF so that ondisk
> +	 * file size updates in the ioend completion are byte-accurate.
> +	 * This avoids recovering files with zeroed tail regions when
> +	 * writeback races with appending writes:
> +	 *
> +	 *    Thread 1:                  Thread 2:
> +	 *    ------------               -----------
> +	 *    write [A, A+B]
> +	 *    update inode size to A+B
> +	 *    submit I/O [A, A+BS]
> +	 *                               write [A+B, A+B+C]
> +	 *                               update inode size to A+B+C
> +	 *    <I/O completes, updates disk size to min(A+B+C, A+BS)>
> +	 *    <power failure>
> +	 *
> +	 *  After reboot:
> +	 *    1) with A+B+C < A+BS, the file has zero padding in range
> +	 *       [A+B, A+B+C]
> +	 *
> +	 *    |<     Block Size (BS)   >|
> +	 *    |DDDDDDDDDDDD0000000000000|
> +	 *    ^           ^        ^
> +	 *    A          A+B     A+B+C
> +	 *                       (EOF)
> +	 *
> +	 *    2) with A+B+C > A+BS, the file has zero padding in range
> +	 *       [A+B, A+BS]
> +	 *
> +	 *    |<     Block Size (BS)   >|<     Block Size (BS)    >|
> +	 *    |DDDDDDDDDDDD0000000000000|00000000000000000000000000|
> +	 *    ^           ^             ^           ^
> +	 *    A          A+B           A+BS       A+B+C
> +	 *                             (EOF)
> +	 *
> +	 *    D = Valid Data
> +	 *    0 = Zero Padding
> +	 *
> +	 * Note that this defeats the ability to chain the ioends of
> +	 * appending writes.
> +	 */
> +	ioend->io_size += map_len;
> +	if (ioend->io_offset + ioend->io_size > end_pos)
> +		ioend->io_size = end_pos - ioend->io_offset;
> +
> +	wbc_account_cgroup_owner(wpc->wbc, folio, map_len);
> +	return map_len;
> +}
> +EXPORT_SYMBOL_GPL(iomap_add_to_ioend);
> +
>  static u32 iomap_finish_ioend(struct iomap_ioend *ioend, int error)
>  {
>  	if (ioend->io_parent) {
> -- 
> 2.47.2
> 
> 


  reply	other threads:[~2025-06-27 15:11 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-27  7:02 refactor the iomap writeback code v3 Christoph Hellwig
2025-06-27  7:02 ` [PATCH 01/12] iomap: pass more arguments using the iomap writeback context Christoph Hellwig
2025-06-27 15:12   ` Brian Foster
2025-06-30  5:44     ` Christoph Hellwig
2025-06-30 12:41       ` Brian Foster
2025-07-02 18:18         ` Darrick J. Wong
2025-07-02 22:00           ` Joanne Koong
2025-07-02 22:23             ` Darrick J. Wong
2025-07-02 18:22   ` Darrick J. Wong
2025-06-27  7:02 ` [PATCH 02/12] iomap: cleanup the pending writeback tracking in iomap_writepage_map_blocks Christoph Hellwig
2025-06-27 15:12   ` Brian Foster
2025-07-02 18:23   ` Darrick J. Wong
2025-06-27  7:02 ` [PATCH 03/12] iomap: refactor the writeback interface Christoph Hellwig
2025-06-27  8:23   ` Damien Le Moal
2025-06-27 15:14   ` Brian Foster
2025-06-30  5:42     ` Christoph Hellwig
2025-06-30 12:39       ` Brian Foster
2025-07-02 18:24   ` Darrick J. Wong
2025-06-27  7:02 ` [PATCH 04/12] iomap: hide ioends from the generic writeback code Christoph Hellwig
2025-06-27  8:26   ` Damien Le Moal
2025-06-27 15:14   ` Brian Foster
2025-06-28  3:09   ` Randy Dunlap
2025-07-02 18:25   ` Darrick J. Wong
2025-06-27  7:02 ` [PATCH 05/12] iomap: add public helpers for uptodate state manipulation Christoph Hellwig
2025-06-27 15:14   ` Brian Foster
2025-07-02 18:25   ` Darrick J. Wong
2025-06-27  7:02 ` [PATCH 06/12] iomap: move all ioend handling to ioend.c Christoph Hellwig
2025-06-27 15:15   ` Brian Foster [this message]
2025-06-30  5:44     ` Christoph Hellwig
2025-07-02 18:26   ` Darrick J. Wong
2025-06-27  7:02 ` [PATCH 07/12] iomap: rename iomap_writepage_map to iomap_writeback_folio Christoph Hellwig
2025-06-27 16:38   ` Brian Foster
2025-07-02 18:26   ` Darrick J. Wong
2025-06-27  7:02 ` [PATCH 08/12] iomap: move folio_unlock out of iomap_writeback_folio Christoph Hellwig
2025-06-27 16:38   ` Brian Foster
2025-06-30  5:45     ` Christoph Hellwig
2025-06-30 12:39       ` Brian Foster
2025-06-27  7:02 ` [PATCH 09/12] iomap: export iomap_writeback_folio Christoph Hellwig
2025-07-02 18:27   ` Darrick J. Wong
2025-06-27  7:02 ` [PATCH 10/12] iomap: replace iomap_folio_ops with iomap_write_ops Christoph Hellwig
2025-06-27  8:29   ` Damien Le Moal
2025-06-27 19:18   ` Brian Foster
2025-06-30  5:43     ` Christoph Hellwig
2025-07-02 18:28   ` Darrick J. Wong
2025-06-27  7:02 ` [PATCH 11/12] iomap: add read_folio_range() handler for buffered writes Christoph Hellwig
2025-06-27 19:18   ` Brian Foster
2025-06-30  5:47     ` Christoph Hellwig
2025-06-27  7:02 ` [PATCH 12/12] iomap: build the writeback code without CONFIG_BLOCK Christoph Hellwig
2025-07-02 18:20   ` Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aF61grplkxy7RXie@bfoster \
    --to=bfoster@redhat.com \
    --cc=brauner@kernel.org \
    --cc=djwong@kernel.org \
    --cc=gfs2@lists.linux.dev \
    --cc=hch@lst.de \
    --cc=joannelkoong@gmail.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.