All of lore.kernel.org
 help / color / mirror / Atom feed
* don't build bios/contexts over multiple iomaps v2
@ 2026-06-23 13:51 Christoph Hellwig
  2026-06-23 13:51 ` [PATCH 1/2] iomap: consolidate bio submission Christoph Hellwig
  2026-06-23 13:51 ` [PATCH 2/2] iomap: submit read bio after each extent Christoph Hellwig
  0 siblings, 2 replies; 9+ messages in thread
From: Christoph Hellwig @ 2026-06-23 13:51 UTC (permalink / raw)
  To: Christian Brauner, Darrick J. Wong
  Cc: Kelu Ye, Yifan Zhao, Ritesh Harjani, Joanne Koong, Namjae Jeon,
	Sungjong Seo, Hyunchul Lee, Konstantin Komarov, Miklos Szeredi,
	fuse-devel, ntfs3, linux-erofs, linux-xfs, linux-fsdevel

Hi all,

this patch changes how iomap submits bios for reads.  The old behavior
to build up bios across iomap was already considered problematic for
a while, but we now ran into a erofs bug because of it, so it's time
to finally fix it.

It would be great to get the fix into 7.2 as the fixed bug can be
triggered by users.

Changes since v1:
 - don't submit fuse context after each iteration
 - consolidate some code to support the above
 - fix a bug in the fs PI support found while doing the above

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/2] iomap: consolidate bio submission
  2026-06-23 13:51 don't build bios/contexts over multiple iomaps v2 Christoph Hellwig
@ 2026-06-23 13:51 ` Christoph Hellwig
  2026-06-23 17:04   ` Joanne Koong
  2026-06-23 23:57   ` Namjae Jeon
  2026-06-23 13:51 ` [PATCH 2/2] iomap: submit read bio after each extent Christoph Hellwig
  1 sibling, 2 replies; 9+ messages in thread
From: Christoph Hellwig @ 2026-06-23 13:51 UTC (permalink / raw)
  To: Christian Brauner, Darrick J. Wong
  Cc: Kelu Ye, Yifan Zhao, Ritesh Harjani, Joanne Koong, Namjae Jeon,
	Sungjong Seo, Hyunchul Lee, Konstantin Komarov, Miklos Szeredi,
	fuse-devel, ntfs3, linux-erofs, linux-xfs, linux-fsdevel

Add a iomap_bio_submit_read_endio helper factored out of
iomap_bio_submit_read to that all ->submit_read implementations for
iomap_read_ops that use iomap_bio_read_folio_range can shared the
logic.

Right now that logic is mostly trivial, but already has a bug for XFS
because the XFS version is too trivial:  file system integrity validation
needs a workqueue context and thus can't happen from the default iomap
bi_end_io I/O handler.  Unfortunately the iomap refactoring just before
fs integrity landed moved code around here and the call go misplaced,
meaning it never got called.  The PI information still is verified by
the block layer, but the offloading is less efficient (and the future
userspace interface can't get at it).

Fixes: 0b10a370529c ("iomap: support T10 protection information")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/exfat/iomap.c      |  5 +----
 fs/iomap/bio.c        | 13 ++++++++++---
 fs/ntfs/aops.c        |  6 ++----
 fs/ntfs3/inode.c      |  5 +----
 fs/xfs/xfs_aops.c     |  3 +--
 include/linux/iomap.h |  2 ++
 6 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/fs/exfat/iomap.c b/fs/exfat/iomap.c
index 1aac38e63fe6..190fc6471f84 100644
--- a/fs/exfat/iomap.c
+++ b/fs/exfat/iomap.c
@@ -253,10 +253,7 @@ static void exfat_iomap_read_end_io(struct bio *bio)
 static void exfat_iomap_bio_submit_read(const struct iomap_iter *iter,
 		struct iomap_read_folio_ctx *ctx)
 {
-	struct bio *bio = ctx->read_ctx;
-
-	bio->bi_end_io = exfat_iomap_read_end_io;
-	submit_bio(bio);
+	iomap_bio_submit_read_endio(iter, ctx, exfat_iomap_read_end_io);
 }
 
 const struct iomap_read_ops exfat_iomap_bio_read_ops = {
diff --git a/fs/iomap/bio.c b/fs/iomap/bio.c
index 4504f4633f17..0f31e35567b4 100644
--- a/fs/iomap/bio.c
+++ b/fs/iomap/bio.c
@@ -78,15 +78,23 @@ u32 iomap_finish_ioend_buffered_read(struct iomap_ioend *ioend)
 	return __iomap_read_end_io(&ioend->io_bio, ioend->io_error);
 }
 
-static void iomap_bio_submit_read(const struct iomap_iter *iter,
-		struct iomap_read_folio_ctx *ctx)
+void iomap_bio_submit_read_endio(const struct iomap_iter *iter,
+		struct iomap_read_folio_ctx *ctx, bio_end_io_t end_io)
 {
 	struct bio *bio = ctx->read_ctx;
 
+	bio->bi_end_io = end_io;
 	if (iter->iomap.flags & IOMAP_F_INTEGRITY)
 		fs_bio_integrity_alloc(bio);
 	submit_bio(bio);
 }
+EXPORT_SYMBOL_GPL(iomap_bio_submit_read_endio);
+
+static void iomap_bio_submit_read(const struct iomap_iter *iter,
+		struct iomap_read_folio_ctx *ctx)
+{
+	return iomap_bio_submit_read_endio(iter, ctx, iomap_read_end_io);
+}
 
 static struct bio_set *iomap_read_bio_set(struct iomap_read_folio_ctx *ctx)
 {
@@ -127,7 +135,6 @@ static void iomap_read_alloc_bio(const struct iomap_iter *iter,
 	if (ctx->rac)
 		bio->bi_opf |= REQ_RAHEAD;
 	bio->bi_iter.bi_sector = iomap_sector(iomap, iter->pos);
-	bio->bi_end_io = iomap_read_end_io;
 	bio_add_folio_nofail(bio, folio, plen,
 			offset_in_folio(folio, iter->pos));
 	ctx->read_ctx = bio;
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 1fbf832ad165..f2bb56506046 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -38,11 +38,9 @@ static void ntfs_iomap_read_end_io(struct bio *bio)
 }
 
 static void ntfs_iomap_bio_submit_read(const struct iomap_iter *iter,
-	struct iomap_read_folio_ctx *ctx)
+		struct iomap_read_folio_ctx *ctx)
 {
-	struct bio *bio = ctx->read_ctx;
-	bio->bi_end_io = ntfs_iomap_read_end_io;
-	submit_bio(bio);
+	iomap_bio_submit_read_endio(iter, ctx, ntfs_iomap_read_end_io);
 }
 
 static const struct iomap_read_ops ntfs_iomap_bio_read_ops = {
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 42af1abe17f8..f9600aba1548 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -609,10 +609,7 @@ static void ntfs_iomap_read_end_io(struct bio *bio)
 static void ntfs_iomap_bio_submit_read(const struct iomap_iter *iter,
 		struct iomap_read_folio_ctx *ctx)
 {
-	struct bio *bio = ctx->read_ctx;
-
-	bio->bi_end_io = ntfs_iomap_read_end_io;
-	submit_bio(bio);
+	iomap_bio_submit_read_endio(iter, ctx, ntfs_iomap_read_end_io);
 }
 
 static const struct iomap_read_ops ntfs_iomap_bio_read_ops = {
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 2a0c54256e93..51293b6f331f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -764,8 +764,7 @@ xfs_bio_submit_read(
 
 	/* defer read completions to the ioend workqueue */
 	iomap_init_ioend(iter->inode, bio, ctx->read_ctx_file_offset, 0);
-	bio->bi_end_io = xfs_end_bio;
-	submit_bio(bio);
+	iomap_bio_submit_read_endio(iter, ctx, xfs_end_bio);
 }
 
 static const struct iomap_read_ops xfs_iomap_read_ops = {
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3582ed1fe236..56b43d594e6e 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -622,6 +622,8 @@ extern struct bio_set iomap_ioend_bioset;
 #ifdef CONFIG_BLOCK
 int iomap_bio_read_folio_range(const struct iomap_iter *iter,
 		struct iomap_read_folio_ctx *ctx, size_t plen);
+void iomap_bio_submit_read_endio(const struct iomap_iter *iter,
+		struct iomap_read_folio_ctx *ctx, bio_end_io_t end_io);
 
 extern const struct iomap_read_ops iomap_bio_read_ops;
 
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] iomap: submit read bio after each extent
  2026-06-23 13:51 don't build bios/contexts over multiple iomaps v2 Christoph Hellwig
  2026-06-23 13:51 ` [PATCH 1/2] iomap: consolidate bio submission Christoph Hellwig
@ 2026-06-23 13:51 ` Christoph Hellwig
  2026-06-23 17:29   ` Joanne Koong
                     ` (2 more replies)
  1 sibling, 3 replies; 9+ messages in thread
From: Christoph Hellwig @ 2026-06-23 13:51 UTC (permalink / raw)
  To: Christian Brauner, Darrick J. Wong
  Cc: Kelu Ye, Yifan Zhao, Ritesh Harjani, Joanne Koong, Namjae Jeon,
	Sungjong Seo, Hyunchul Lee, Konstantin Komarov, Miklos Szeredi,
	fuse-devel, ntfs3, linux-erofs, linux-xfs, linux-fsdevel

Currently the iomap buffered read path tries to build up read context
(i.e. bios for the typical block based case) over multiple iomaps as
long as the sector matches.  This does not take into account files
that can map to multiple different devices.  While this could be fixed
by a bdev check in iomap_bio_read_folio_range, the building up of I/O
over iomaps actually was a problem for the not yet merged ext2 iomap
port, as that does want to send out I/O at the end of an indirect
block mapped range.

So instead of adding more checks move over to a model where a bio only
spans a single iomap.  Change ->submit_read to be called after each
iteration, and pass a force argument to indicate that the bio must
be submitted set on the last iteration.  Switch the bio based users
to always submit, while keeping the single submit for fuse.

Fixes: dfeab2e95a75 ("erofs: add multiple device support")
Reported-by: Kelu Ye <yekelu1@huawei.com>
Reported-by: Yifan Zhao <zhaoyifan28@huawei.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/exfat/iomap.c       |  4 ++--
 fs/fuse/file.c         |  6 +++++-
 fs/iomap/bio.c         | 11 +++++++----
 fs/iomap/buffered-io.c | 23 +++++++++++++++--------
 fs/ntfs/aops.c         |  4 ++--
 fs/ntfs3/inode.c       |  4 ++--
 fs/xfs/xfs_aops.c      |  5 +++--
 include/linux/iomap.h  |  5 +++--
 8 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/fs/exfat/iomap.c b/fs/exfat/iomap.c
index 190fc6471f84..58e25c4e8587 100644
--- a/fs/exfat/iomap.c
+++ b/fs/exfat/iomap.c
@@ -251,9 +251,9 @@ static void exfat_iomap_read_end_io(struct bio *bio)
 }
 
 static void exfat_iomap_bio_submit_read(const struct iomap_iter *iter,
-		struct iomap_read_folio_ctx *ctx)
+		struct iomap_read_folio_ctx *ctx, bool force)
 {
-	iomap_bio_submit_read_endio(iter, ctx, exfat_iomap_read_end_io);
+	iomap_bio_submit_read_endio(iter, ctx, force, exfat_iomap_read_end_io);
 }
 
 const struct iomap_read_ops exfat_iomap_bio_read_ops = {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e052a0d44dee..6fa3b1f55c95 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -982,13 +982,17 @@ static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
 }
 
 static void fuse_iomap_submit_read(const struct iomap_iter *iter,
-		struct iomap_read_folio_ctx *ctx)
+		struct iomap_read_folio_ctx *ctx, bool force)
 {
 	struct fuse_fill_read_data *data = ctx->read_ctx;
 
+	if (!force)
+		return;
+
 	if (data->ia)
 		fuse_send_readpages(data->ia, data->file, data->nr_bytes,
 				    data->fc->async_read);
+	ctx->read_ctx = NULL;
 }
 
 static const struct iomap_read_ops fuse_iomap_read_ops = {
diff --git a/fs/iomap/bio.c b/fs/iomap/bio.c
index 0f31e35567b4..f71aaaf60301 100644
--- a/fs/iomap/bio.c
+++ b/fs/iomap/bio.c
@@ -79,7 +79,8 @@ u32 iomap_finish_ioend_buffered_read(struct iomap_ioend *ioend)
 }
 
 void iomap_bio_submit_read_endio(const struct iomap_iter *iter,
-		struct iomap_read_folio_ctx *ctx, bio_end_io_t end_io)
+		struct iomap_read_folio_ctx *ctx, bool force,
+		bio_end_io_t end_io)
 {
 	struct bio *bio = ctx->read_ctx;
 
@@ -87,13 +88,15 @@ void iomap_bio_submit_read_endio(const struct iomap_iter *iter,
 	if (iter->iomap.flags & IOMAP_F_INTEGRITY)
 		fs_bio_integrity_alloc(bio);
 	submit_bio(bio);
+
+	ctx->read_ctx = NULL;
 }
 EXPORT_SYMBOL_GPL(iomap_bio_submit_read_endio);
 
 static void iomap_bio_submit_read(const struct iomap_iter *iter,
-		struct iomap_read_folio_ctx *ctx)
+		struct iomap_read_folio_ctx *ctx, bool force)
 {
-	return iomap_bio_submit_read_endio(iter, ctx, iomap_read_end_io);
+	return iomap_bio_submit_read_endio(iter, ctx, force, iomap_read_end_io);
 }
 
 static struct bio_set *iomap_read_bio_set(struct iomap_read_folio_ctx *ctx)
@@ -116,7 +119,7 @@ static void iomap_read_alloc_bio(const struct iomap_iter *iter,
 
 	/* Submit the existing range if there was one. */
 	if (ctx->read_ctx)
-		ctx->ops->submit_read(iter, ctx);
+		ctx->ops->submit_read(iter, ctx, true);
 
 	/* Same as readahead_gfp_mask: */
 	if (ctx->rac)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 8d4806dc46d4..06a216d37548 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -524,6 +524,13 @@ static void iomap_read_end(struct folio *folio, size_t bytes_submitted)
 	}
 }
 
+static void iomap_submit_read(struct iomap_iter *iter,
+		struct iomap_read_folio_ctx *ctx, bool force)
+{
+	if (ctx->read_ctx && ctx->ops->submit_read)
+		ctx->ops->submit_read(iter, ctx, force);
+}
+
 static int iomap_read_folio_iter(struct iomap_iter *iter,
 		struct iomap_read_folio_ctx *ctx, size_t *bytes_submitted)
 {
@@ -642,12 +649,12 @@ void iomap_read_folio(const struct iomap_ops *ops,
 		fsverity_readahead(ctx->vi, folio->index,
 				   folio_nr_pages(folio));
 
-	while ((ret = iomap_iter(&iter, ops)) > 0)
+	while ((ret = iomap_iter(&iter, ops)) > 0) {
+		iomap_submit_read(&iter, ctx, false);
 		iter.status = iomap_read_folio_iter(&iter, ctx,
 				&bytes_submitted);
-
-	if (ctx->read_ctx && ctx->ops->submit_read)
-		ctx->ops->submit_read(&iter, ctx);
+	}
+	iomap_submit_read(&iter, ctx, true);
 
 	if (ctx->cur_folio)
 		iomap_read_end(ctx->cur_folio, bytes_submitted);
@@ -718,12 +725,12 @@ void iomap_readahead(const struct iomap_ops *ops,
 		fsverity_readahead(ctx->vi, readahead_index(rac),
 				readahead_count(rac));
 
-	while (iomap_iter(&iter, ops) > 0)
+	while (iomap_iter(&iter, ops) > 0) {
+		iomap_submit_read(&iter, ctx, false);
 		iter.status = iomap_readahead_iter(&iter, ctx,
 					&cur_bytes_submitted);
-
-	if (ctx->read_ctx && ctx->ops->submit_read)
-		ctx->ops->submit_read(&iter, ctx);
+	}
+	iomap_submit_read(&iter, ctx, true);
 
 	if (ctx->cur_folio)
 		iomap_read_end(ctx->cur_folio, cur_bytes_submitted);
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index f2bb56506046..c32ecc28cb52 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -38,9 +38,9 @@ static void ntfs_iomap_read_end_io(struct bio *bio)
 }
 
 static void ntfs_iomap_bio_submit_read(const struct iomap_iter *iter,
-		struct iomap_read_folio_ctx *ctx)
+		struct iomap_read_folio_ctx *ctx, bool force)
 {
-	iomap_bio_submit_read_endio(iter, ctx, ntfs_iomap_read_end_io);
+	iomap_bio_submit_read_endio(iter, ctx, force, ntfs_iomap_read_end_io);
 }
 
 static const struct iomap_read_ops ntfs_iomap_bio_read_ops = {
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index f9600aba1548..110c9b8208e1 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -607,9 +607,9 @@ static void ntfs_iomap_read_end_io(struct bio *bio)
 }
 
 static void ntfs_iomap_bio_submit_read(const struct iomap_iter *iter,
-		struct iomap_read_folio_ctx *ctx)
+		struct iomap_read_folio_ctx *ctx, bool force)
 {
-	iomap_bio_submit_read_endio(iter, ctx, ntfs_iomap_read_end_io);
+	iomap_bio_submit_read_endio(iter, ctx, force, ntfs_iomap_read_end_io);
 }
 
 static const struct iomap_read_ops ntfs_iomap_bio_read_ops = {
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 51293b6f331f..42ebb2265408 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -758,13 +758,14 @@ xfs_vm_bmap(
 static void
 xfs_bio_submit_read(
 	const struct iomap_iter		*iter,
-	struct iomap_read_folio_ctx	*ctx)
+	struct iomap_read_folio_ctx	*ctx,
+	bool				force)
 {
 	struct bio			*bio = ctx->read_ctx;
 
 	/* defer read completions to the ioend workqueue */
 	iomap_init_ioend(iter->inode, bio, ctx->read_ctx_file_offset, 0);
-	iomap_bio_submit_read_endio(iter, ctx, xfs_end_bio);
+	iomap_bio_submit_read_endio(iter, ctx, force, xfs_end_bio);
 }
 
 static const struct iomap_read_ops xfs_iomap_read_ops = {
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 56b43d594e6e..266844b62372 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -528,7 +528,7 @@ struct iomap_read_ops {
 	 * This is optional.
 	 */
 	void (*submit_read)(const struct iomap_iter *iter,
-			struct iomap_read_folio_ctx *ctx);
+			struct iomap_read_folio_ctx *ctx, bool force);
 
 	/*
 	 * Optional, allows filesystem to specify own bio_set, so new bio's
@@ -623,7 +623,8 @@ extern struct bio_set iomap_ioend_bioset;
 int iomap_bio_read_folio_range(const struct iomap_iter *iter,
 		struct iomap_read_folio_ctx *ctx, size_t plen);
 void iomap_bio_submit_read_endio(const struct iomap_iter *iter,
-		struct iomap_read_folio_ctx *ctx, bio_end_io_t end_io);
+		struct iomap_read_folio_ctx *ctx, bool force,
+		bio_end_io_t end_io);
 
 extern const struct iomap_read_ops iomap_bio_read_ops;
 
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] iomap: consolidate bio submission
  2026-06-23 13:51 ` [PATCH 1/2] iomap: consolidate bio submission Christoph Hellwig
@ 2026-06-23 17:04   ` Joanne Koong
  2026-06-23 23:57   ` Namjae Jeon
  1 sibling, 0 replies; 9+ messages in thread
From: Joanne Koong @ 2026-06-23 17:04 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Christian Brauner, Darrick J. Wong, Kelu Ye, Yifan Zhao,
	Ritesh Harjani, Namjae Jeon, Sungjong Seo, Hyunchul Lee,
	Konstantin Komarov, Miklos Szeredi, fuse-devel, ntfs3,
	linux-erofs, linux-xfs, linux-fsdevel

On Tue, Jun 23, 2026 at 6:52 AM Christoph Hellwig <hch@lst.de> wrote:
>
> Add a iomap_bio_submit_read_endio helper factored out of
> iomap_bio_submit_read to that all ->submit_read implementations for
> iomap_read_ops that use iomap_bio_read_folio_range can shared the
> logic.
>
> Right now that logic is mostly trivial, but already has a bug for XFS
> because the XFS version is too trivial:  file system integrity validation
> needs a workqueue context and thus can't happen from the default iomap
> bi_end_io I/O handler.  Unfortunately the iomap refactoring just before
> fs integrity landed moved code around here and the call go misplaced,
> meaning it never got called.  The PI information still is verified by
> the block layer, but the offloading is less efficient (and the future
> userspace interface can't get at it).
>
> Fixes: 0b10a370529c ("iomap: support T10 protection information")
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Joanne Koong <joannelkoong@gmail.com>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] iomap: submit read bio after each extent
  2026-06-23 13:51 ` [PATCH 2/2] iomap: submit read bio after each extent Christoph Hellwig
@ 2026-06-23 17:29   ` Joanne Koong
  2026-06-24  7:34     ` Christoph Hellwig
  2026-06-23 23:58   ` Namjae Jeon
  2026-06-24  7:42   ` zhaoyifan (H)
  2 siblings, 1 reply; 9+ messages in thread
From: Joanne Koong @ 2026-06-23 17:29 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Christian Brauner, Darrick J. Wong, Kelu Ye, Yifan Zhao,
	Ritesh Harjani, Namjae Jeon, Sungjong Seo, Hyunchul Lee,
	Konstantin Komarov, Miklos Szeredi, fuse-devel, ntfs3,
	linux-erofs, linux-xfs, linux-fsdevel

On Tue, Jun 23, 2026 at 6:52 AM Christoph Hellwig <hch@lst.de> wrote:
>
> Currently the iomap buffered read path tries to build up read context
> (i.e. bios for the typical block based case) over multiple iomaps as
> long as the sector matches.  This does not take into account files
> that can map to multiple different devices.  While this could be fixed
> by a bdev check in iomap_bio_read_folio_range, the building up of I/O
> over iomaps actually was a problem for the not yet merged ext2 iomap
> port, as that does want to send out I/O at the end of an indirect
> block mapped range.
>
> So instead of adding more checks move over to a model where a bio only
> spans a single iomap.  Change ->submit_read to be called after each
> iteration, and pass a force argument to indicate that the bio must
> be submitted set on the last iteration.  Switch the bio based users
> to always submit, while keeping the single submit for fuse.
>
> Fixes: dfeab2e95a75 ("erofs: add multiple device support")
> Reported-by: Kelu Ye <yekelu1@huawei.com>
> Reported-by: Yifan Zhao <zhaoyifan28@huawei.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/exfat/iomap.c       |  4 ++--
>  fs/fuse/file.c         |  6 +++++-
>  fs/iomap/bio.c         | 11 +++++++----
>  fs/iomap/buffered-io.c | 23 +++++++++++++++--------
>  fs/ntfs/aops.c         |  4 ++--
>  fs/ntfs3/inode.c       |  4 ++--
>  fs/xfs/xfs_aops.c      |  5 +++--
>  include/linux/iomap.h  |  5 +++--
>  8 files changed, 39 insertions(+), 23 deletions(-)
>
> diff --git a/fs/iomap/bio.c b/fs/iomap/bio.c
> index 0f31e35567b4..f71aaaf60301 100644
> --- a/fs/iomap/bio.c
> +++ b/fs/iomap/bio.c
> @@ -79,7 +79,8 @@ u32 iomap_finish_ioend_buffered_read(struct iomap_ioend *ioend)
>  }
>
>  void iomap_bio_submit_read_endio(const struct iomap_iter *iter,
> -               struct iomap_read_folio_ctx *ctx, bio_end_io_t end_io)
> +               struct iomap_read_folio_ctx *ctx, bool force,

nit: might simplify things to drop the unused force arg

> +               bio_end_io_t end_io)
>  {
>         struct bio *bio = ctx->read_ctx;
>
> @@ -87,13 +88,15 @@ void iomap_bio_submit_read_endio(const struct iomap_iter *iter,
>         if (iter->iomap.flags & IOMAP_F_INTEGRITY)
>                 fs_bio_integrity_alloc(bio);
>         submit_bio(bio);
> +
> +       ctx->read_ctx = NULL;
>  }
>  EXPORT_SYMBOL_GPL(iomap_bio_submit_read_endio);
>
>
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 8d4806dc46d4..06a216d37548 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
>
> @@ -642,12 +649,12 @@ void iomap_read_folio(const struct iomap_ops *ops,
>                 fsverity_readahead(ctx->vi, folio->index,
>                                    folio_nr_pages(folio));
>
> -       while ((ret = iomap_iter(&iter, ops)) > 0)
> +       while ((ret = iomap_iter(&iter, ops)) > 0) {
> +               iomap_submit_read(&iter, ctx, false);
>                 iter.status = iomap_read_folio_iter(&iter, ctx,
>                                 &bytes_submitted);

should the submit_read happen after the iomap_read_folio_iter() /
iomap_readahead_iter() instaed of before? From what I see, it looks
like iomap_submit_read() would hold the iter state of the next
mapping. It seems like in iomap_bio_submit_read_endio(), the
iter->iomap.flags would be the next extent's flags instead of the one
that needs to be submitted?

Thanks,
Joanne

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] iomap: consolidate bio submission
  2026-06-23 13:51 ` [PATCH 1/2] iomap: consolidate bio submission Christoph Hellwig
  2026-06-23 17:04   ` Joanne Koong
@ 2026-06-23 23:57   ` Namjae Jeon
  1 sibling, 0 replies; 9+ messages in thread
From: Namjae Jeon @ 2026-06-23 23:57 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Christian Brauner, Darrick J. Wong, Kelu Ye, Yifan Zhao,
	Ritesh Harjani, Joanne Koong, Sungjong Seo, Hyunchul Lee,
	Konstantin Komarov, Miklos Szeredi, fuse-devel, ntfs3,
	linux-erofs, linux-xfs, linux-fsdevel

On Tue, Jun 23, 2026 at 10:52 PM Christoph Hellwig <hch@lst.de> wrote:
>
> Add a iomap_bio_submit_read_endio helper factored out of
> iomap_bio_submit_read to that all ->submit_read implementations for
> iomap_read_ops that use iomap_bio_read_folio_range can shared the
> logic.
>
> Right now that logic is mostly trivial, but already has a bug for XFS
> because the XFS version is too trivial:  file system integrity validation
> needs a workqueue context and thus can't happen from the default iomap
> bi_end_io I/O handler.  Unfortunately the iomap refactoring just before
> fs integrity landed moved code around here and the call go misplaced,
> meaning it never got called.  The PI information still is verified by
> the block layer, but the offloading is less efficient (and the future
> userspace interface can't get at it).
>
> Fixes: 0b10a370529c ("iomap: support T10 protection information")
> Signed-off-by: Christoph Hellwig <hch@lst.de>
for ntfs, exfat part.
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Thanks!

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] iomap: submit read bio after each extent
  2026-06-23 13:51 ` [PATCH 2/2] iomap: submit read bio after each extent Christoph Hellwig
  2026-06-23 17:29   ` Joanne Koong
@ 2026-06-23 23:58   ` Namjae Jeon
  2026-06-24  7:42   ` zhaoyifan (H)
  2 siblings, 0 replies; 9+ messages in thread
From: Namjae Jeon @ 2026-06-23 23:58 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Christian Brauner, Darrick J. Wong, Kelu Ye, Yifan Zhao,
	Ritesh Harjani, Joanne Koong, Sungjong Seo, Hyunchul Lee,
	Konstantin Komarov, Miklos Szeredi, fuse-devel, ntfs3,
	linux-erofs, linux-xfs, linux-fsdevel

On Tue, Jun 23, 2026 at 10:52 PM Christoph Hellwig <hch@lst.de> wrote:
>
> Currently the iomap buffered read path tries to build up read context
> (i.e. bios for the typical block based case) over multiple iomaps as
> long as the sector matches.  This does not take into account files
> that can map to multiple different devices.  While this could be fixed
> by a bdev check in iomap_bio_read_folio_range, the building up of I/O
> over iomaps actually was a problem for the not yet merged ext2 iomap
> port, as that does want to send out I/O at the end of an indirect
> block mapped range.
>
> So instead of adding more checks move over to a model where a bio only
> spans a single iomap.  Change ->submit_read to be called after each
> iteration, and pass a force argument to indicate that the bio must
> be submitted set on the last iteration.  Switch the bio based users
> to always submit, while keeping the single submit for fuse.
>
> Fixes: dfeab2e95a75 ("erofs: add multiple device support")
> Reported-by: Kelu Ye <yekelu1@huawei.com>
> Reported-by: Yifan Zhao <zhaoyifan28@huawei.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
for ntfs, exfat part.
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Thanks!


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] iomap: submit read bio after each extent
  2026-06-23 17:29   ` Joanne Koong
@ 2026-06-24  7:34     ` Christoph Hellwig
  0 siblings, 0 replies; 9+ messages in thread
From: Christoph Hellwig @ 2026-06-24  7:34 UTC (permalink / raw)
  To: Joanne Koong
  Cc: Christoph Hellwig, Christian Brauner, Darrick J. Wong, Kelu Ye,
	Yifan Zhao, Ritesh Harjani, Namjae Jeon, Sungjong Seo,
	Hyunchul Lee, Konstantin Komarov, Miklos Szeredi, fuse-devel,
	ntfs3, linux-erofs, linux-xfs, linux-fsdevel

On Tue, Jun 23, 2026 at 10:29:54AM -0700, Joanne Koong wrote:
> >  void iomap_bio_submit_read_endio(const struct iomap_iter *iter,
> > -               struct iomap_read_folio_ctx *ctx, bio_end_io_t end_io)
> > +               struct iomap_read_folio_ctx *ctx, bool force,
> 
> nit: might simplify things to drop the unused force arg

I guess this is not directly used as a method, so we could.

> >
> > -       while ((ret = iomap_iter(&iter, ops)) > 0)
> > +       while ((ret = iomap_iter(&iter, ops)) > 0) {
> > +               iomap_submit_read(&iter, ctx, false);
> >                 iter.status = iomap_read_folio_iter(&iter, ctx,
> >                                 &bytes_submitted);
> 
> should the submit_read happen after the iomap_read_folio_iter() /
> iomap_readahead_iter() instaed of before? From what I see, it looks
> like iomap_submit_read() would hold the iter state of the next
> mapping. It seems like in iomap_bio_submit_read_endio(), the
> iter->iomap.flags would be the next extent's flags instead of the one
> that needs to be submitted?

Yeah, the iter state would be wrong here if anyone actually used it.
But the only thing we actually ever use from it is the inode in XFS.

So I'm tempted to instead just adopt the signature to not pass the
iter, as nothing should rely on it.  The only interesting thing I could
think of for the future would be to pass on private data, but that's
probably better left for when we actually need it.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] iomap: submit read bio after each extent
  2026-06-23 13:51 ` [PATCH 2/2] iomap: submit read bio after each extent Christoph Hellwig
  2026-06-23 17:29   ` Joanne Koong
  2026-06-23 23:58   ` Namjae Jeon
@ 2026-06-24  7:42   ` zhaoyifan (H)
  2 siblings, 0 replies; 9+ messages in thread
From: zhaoyifan (H) @ 2026-06-24  7:42 UTC (permalink / raw)
  To: Christoph Hellwig, Christian Brauner, Darrick J. Wong
  Cc: Kelu Ye, Ritesh Harjani, Joanne Koong, Namjae Jeon, Sungjong Seo,
	Hyunchul Lee, Konstantin Komarov, Miklos Szeredi, fuse-devel,
	ntfs3, linux-erofs, linux-xfs, linux-fsdevel

The issue where EROFS could merge bios across devices when using iomap 
API no longer exists.

Tested-by: Yifan Zhao <zhaoyifan28@huawei.com>

On 2026/6/23 21:51, Christoph Hellwig wrote:
> Currently the iomap buffered read path tries to build up read context
> (i.e. bios for the typical block based case) over multiple iomaps as
> long as the sector matches.  This does not take into account files
> that can map to multiple different devices.  While this could be fixed
> by a bdev check in iomap_bio_read_folio_range, the building up of I/O
> over iomaps actually was a problem for the not yet merged ext2 iomap
> port, as that does want to send out I/O at the end of an indirect
> block mapped range.
>
> So instead of adding more checks move over to a model where a bio only
> spans a single iomap.  Change ->submit_read to be called after each
> iteration, and pass a force argument to indicate that the bio must
> be submitted set on the last iteration.  Switch the bio based users
> to always submit, while keeping the single submit for fuse.
>
> Fixes: dfeab2e95a75 ("erofs: add multiple device support")
> Reported-by: Kelu Ye <yekelu1@huawei.com>
> Reported-by: Yifan Zhao <zhaoyifan28@huawei.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/exfat/iomap.c       |  4 ++--
>   fs/fuse/file.c         |  6 +++++-
>   fs/iomap/bio.c         | 11 +++++++----
>   fs/iomap/buffered-io.c | 23 +++++++++++++++--------
>   fs/ntfs/aops.c         |  4 ++--
>   fs/ntfs3/inode.c       |  4 ++--
>   fs/xfs/xfs_aops.c      |  5 +++--
>   include/linux/iomap.h  |  5 +++--
>   8 files changed, 39 insertions(+), 23 deletions(-)
>
> diff --git a/fs/exfat/iomap.c b/fs/exfat/iomap.c
> index 190fc6471f84..58e25c4e8587 100644
> --- a/fs/exfat/iomap.c
> +++ b/fs/exfat/iomap.c
> @@ -251,9 +251,9 @@ static void exfat_iomap_read_end_io(struct bio *bio)
>   }
>   
>   static void exfat_iomap_bio_submit_read(const struct iomap_iter *iter,
> -		struct iomap_read_folio_ctx *ctx)
> +		struct iomap_read_folio_ctx *ctx, bool force)
>   {
> -	iomap_bio_submit_read_endio(iter, ctx, exfat_iomap_read_end_io);
> +	iomap_bio_submit_read_endio(iter, ctx, force, exfat_iomap_read_end_io);
>   }
>   
>   const struct iomap_read_ops exfat_iomap_bio_read_ops = {
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index e052a0d44dee..6fa3b1f55c95 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -982,13 +982,17 @@ static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
>   }
>   
>   static void fuse_iomap_submit_read(const struct iomap_iter *iter,
> -		struct iomap_read_folio_ctx *ctx)
> +		struct iomap_read_folio_ctx *ctx, bool force)
>   {
>   	struct fuse_fill_read_data *data = ctx->read_ctx;
>   
> +	if (!force)
> +		return;
> +
>   	if (data->ia)
>   		fuse_send_readpages(data->ia, data->file, data->nr_bytes,
>   				    data->fc->async_read);
> +	ctx->read_ctx = NULL;
>   }
>   
>   static const struct iomap_read_ops fuse_iomap_read_ops = {
> diff --git a/fs/iomap/bio.c b/fs/iomap/bio.c
> index 0f31e35567b4..f71aaaf60301 100644
> --- a/fs/iomap/bio.c
> +++ b/fs/iomap/bio.c
> @@ -79,7 +79,8 @@ u32 iomap_finish_ioend_buffered_read(struct iomap_ioend *ioend)
>   }
>   
>   void iomap_bio_submit_read_endio(const struct iomap_iter *iter,
> -		struct iomap_read_folio_ctx *ctx, bio_end_io_t end_io)
> +		struct iomap_read_folio_ctx *ctx, bool force,
> +		bio_end_io_t end_io)
>   {
>   	struct bio *bio = ctx->read_ctx;
>   
> @@ -87,13 +88,15 @@ void iomap_bio_submit_read_endio(const struct iomap_iter *iter,
>   	if (iter->iomap.flags & IOMAP_F_INTEGRITY)
>   		fs_bio_integrity_alloc(bio);
>   	submit_bio(bio);
> +
> +	ctx->read_ctx = NULL;
>   }
>   EXPORT_SYMBOL_GPL(iomap_bio_submit_read_endio);
>   
>   static void iomap_bio_submit_read(const struct iomap_iter *iter,
> -		struct iomap_read_folio_ctx *ctx)
> +		struct iomap_read_folio_ctx *ctx, bool force)
>   {
> -	return iomap_bio_submit_read_endio(iter, ctx, iomap_read_end_io);
> +	return iomap_bio_submit_read_endio(iter, ctx, force, iomap_read_end_io);
>   }
>   
>   static struct bio_set *iomap_read_bio_set(struct iomap_read_folio_ctx *ctx)
> @@ -116,7 +119,7 @@ static void iomap_read_alloc_bio(const struct iomap_iter *iter,
>   
>   	/* Submit the existing range if there was one. */
>   	if (ctx->read_ctx)
> -		ctx->ops->submit_read(iter, ctx);
> +		ctx->ops->submit_read(iter, ctx, true);
>   
>   	/* Same as readahead_gfp_mask: */
>   	if (ctx->rac)
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 8d4806dc46d4..06a216d37548 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -524,6 +524,13 @@ static void iomap_read_end(struct folio *folio, size_t bytes_submitted)
>   	}
>   }
>   
> +static void iomap_submit_read(struct iomap_iter *iter,
> +		struct iomap_read_folio_ctx *ctx, bool force)
> +{
> +	if (ctx->read_ctx && ctx->ops->submit_read)
> +		ctx->ops->submit_read(iter, ctx, force);
> +}
> +
>   static int iomap_read_folio_iter(struct iomap_iter *iter,
>   		struct iomap_read_folio_ctx *ctx, size_t *bytes_submitted)
>   {
> @@ -642,12 +649,12 @@ void iomap_read_folio(const struct iomap_ops *ops,
>   		fsverity_readahead(ctx->vi, folio->index,
>   				   folio_nr_pages(folio));
>   
> -	while ((ret = iomap_iter(&iter, ops)) > 0)
> +	while ((ret = iomap_iter(&iter, ops)) > 0) {
> +		iomap_submit_read(&iter, ctx, false);
>   		iter.status = iomap_read_folio_iter(&iter, ctx,
>   				&bytes_submitted);
> -
> -	if (ctx->read_ctx && ctx->ops->submit_read)
> -		ctx->ops->submit_read(&iter, ctx);
> +	}
> +	iomap_submit_read(&iter, ctx, true);
>   
>   	if (ctx->cur_folio)
>   		iomap_read_end(ctx->cur_folio, bytes_submitted);
> @@ -718,12 +725,12 @@ void iomap_readahead(const struct iomap_ops *ops,
>   		fsverity_readahead(ctx->vi, readahead_index(rac),
>   				readahead_count(rac));
>   
> -	while (iomap_iter(&iter, ops) > 0)
> +	while (iomap_iter(&iter, ops) > 0) {
> +		iomap_submit_read(&iter, ctx, false);
>   		iter.status = iomap_readahead_iter(&iter, ctx,
>   					&cur_bytes_submitted);
> -
> -	if (ctx->read_ctx && ctx->ops->submit_read)
> -		ctx->ops->submit_read(&iter, ctx);
> +	}
> +	iomap_submit_read(&iter, ctx, true);
>   
>   	if (ctx->cur_folio)
>   		iomap_read_end(ctx->cur_folio, cur_bytes_submitted);
> diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
> index f2bb56506046..c32ecc28cb52 100644
> --- a/fs/ntfs/aops.c
> +++ b/fs/ntfs/aops.c
> @@ -38,9 +38,9 @@ static void ntfs_iomap_read_end_io(struct bio *bio)
>   }
>   
>   static void ntfs_iomap_bio_submit_read(const struct iomap_iter *iter,
> -		struct iomap_read_folio_ctx *ctx)
> +		struct iomap_read_folio_ctx *ctx, bool force)
>   {
> -	iomap_bio_submit_read_endio(iter, ctx, ntfs_iomap_read_end_io);
> +	iomap_bio_submit_read_endio(iter, ctx, force, ntfs_iomap_read_end_io);
>   }
>   
>   static const struct iomap_read_ops ntfs_iomap_bio_read_ops = {
> diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
> index f9600aba1548..110c9b8208e1 100644
> --- a/fs/ntfs3/inode.c
> +++ b/fs/ntfs3/inode.c
> @@ -607,9 +607,9 @@ static void ntfs_iomap_read_end_io(struct bio *bio)
>   }
>   
>   static void ntfs_iomap_bio_submit_read(const struct iomap_iter *iter,
> -		struct iomap_read_folio_ctx *ctx)
> +		struct iomap_read_folio_ctx *ctx, bool force)
>   {
> -	iomap_bio_submit_read_endio(iter, ctx, ntfs_iomap_read_end_io);
> +	iomap_bio_submit_read_endio(iter, ctx, force, ntfs_iomap_read_end_io);
>   }
>   
>   static const struct iomap_read_ops ntfs_iomap_bio_read_ops = {
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 51293b6f331f..42ebb2265408 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -758,13 +758,14 @@ xfs_vm_bmap(
>   static void
>   xfs_bio_submit_read(
>   	const struct iomap_iter		*iter,
> -	struct iomap_read_folio_ctx	*ctx)
> +	struct iomap_read_folio_ctx	*ctx,
> +	bool				force)
>   {
>   	struct bio			*bio = ctx->read_ctx;
>   
>   	/* defer read completions to the ioend workqueue */
>   	iomap_init_ioend(iter->inode, bio, ctx->read_ctx_file_offset, 0);
> -	iomap_bio_submit_read_endio(iter, ctx, xfs_end_bio);
> +	iomap_bio_submit_read_endio(iter, ctx, force, xfs_end_bio);
>   }
>   
>   static const struct iomap_read_ops xfs_iomap_read_ops = {
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 56b43d594e6e..266844b62372 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -528,7 +528,7 @@ struct iomap_read_ops {
>   	 * This is optional.
>   	 */
>   	void (*submit_read)(const struct iomap_iter *iter,
> -			struct iomap_read_folio_ctx *ctx);
> +			struct iomap_read_folio_ctx *ctx, bool force);
>   
>   	/*
>   	 * Optional, allows filesystem to specify own bio_set, so new bio's
> @@ -623,7 +623,8 @@ extern struct bio_set iomap_ioend_bioset;
>   int iomap_bio_read_folio_range(const struct iomap_iter *iter,
>   		struct iomap_read_folio_ctx *ctx, size_t plen);
>   void iomap_bio_submit_read_endio(const struct iomap_iter *iter,
> -		struct iomap_read_folio_ctx *ctx, bio_end_io_t end_io);
> +		struct iomap_read_folio_ctx *ctx, bool force,
> +		bio_end_io_t end_io);
>   
>   extern const struct iomap_read_ops iomap_bio_read_ops;
>   

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2026-06-24  7:42 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-23 13:51 don't build bios/contexts over multiple iomaps v2 Christoph Hellwig
2026-06-23 13:51 ` [PATCH 1/2] iomap: consolidate bio submission Christoph Hellwig
2026-06-23 17:04   ` Joanne Koong
2026-06-23 23:57   ` Namjae Jeon
2026-06-23 13:51 ` [PATCH 2/2] iomap: submit read bio after each extent Christoph Hellwig
2026-06-23 17:29   ` Joanne Koong
2026-06-24  7:34     ` Christoph Hellwig
2026-06-23 23:58   ` Namjae Jeon
2026-06-24  7:42   ` zhaoyifan (H)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.