linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/2] vfs-6.19.iomap commit 51311f045375 fixups
@ 2025-10-27 18:12 Joanne Koong
  2025-10-27 18:12 ` [PATCH v2 1/2] iomap: rename bytes_pending/bytes_accounted to bytes_submitted/bytes_not_submitted Joanne Koong
  2025-10-27 18:12 ` [PATCH v2 2/2] iomap: fix race when reading in all bytes of a folio Joanne Koong
  0 siblings, 2 replies; 3+ messages in thread
From: Joanne Koong @ 2025-10-27 18:12 UTC (permalink / raw)
  To: brauner; +Cc: bfoster, hch, djwong, linux-fsdevel

These are two fixups for commit 51311f045375 ("iomap: track pending read
bytes more optimally") in the vfs-6.19.iomap branch. It would be great
if these could get folded into that original commit, if possible.

Thanks,
Joanne

Joanne Koong (2):
  iomap: rename bytes_pending/bytes_accounted to
    bytes_submitted/bytes_not_submitted
  iomap: fix race when reading in all bytes of a folio

 fs/iomap/buffered-io.c | 75 ++++++++++++++++++++++++++++++++----------
 1 file changed, 57 insertions(+), 18 deletions(-)

-- 
2.47.3


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v2 1/2] iomap: rename bytes_pending/bytes_accounted to bytes_submitted/bytes_not_submitted
  2025-10-27 18:12 [PATCH v2 0/2] vfs-6.19.iomap commit 51311f045375 fixups Joanne Koong
@ 2025-10-27 18:12 ` Joanne Koong
  2025-10-27 18:12 ` [PATCH v2 2/2] iomap: fix race when reading in all bytes of a folio Joanne Koong
  1 sibling, 0 replies; 3+ messages in thread
From: Joanne Koong @ 2025-10-27 18:12 UTC (permalink / raw)
  To: brauner; +Cc: bfoster, hch, djwong, linux-fsdevel

As mentioned by Brian in [1], the naming "bytes_pending" and
"bytes_accounting" may be confusing and could be better named. Rename
this to "bytes_submitted" and "bytes_not_submitted" to make it more
clear that these are bytes we passed to the IO helper to read in.

[1] https://lore.kernel.org/linux-fsdevel/aPuz4Uop66-jRpN-@bfoster/

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 fs/iomap/buffered-io.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 72196e5021b1..4c0d66612a67 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -364,16 +364,16 @@ static void iomap_read_init(struct folio *folio)
 	}
 }
 
-static void iomap_read_end(struct folio *folio, size_t bytes_pending)
+static void iomap_read_end(struct folio *folio, size_t bytes_submitted)
 {
 	struct iomap_folio_state *ifs;
 
 	/*
-	 * If there are no bytes pending, this means we are responsible for
+	 * If there are no bytes submitted, this means we are responsible for
 	 * unlocking the folio here, since no IO helper has taken ownership of
 	 * it.
 	 */
-	if (!bytes_pending) {
+	if (!bytes_submitted) {
 		folio_unlock(folio);
 		return;
 	}
@@ -381,10 +381,11 @@ static void iomap_read_end(struct folio *folio, size_t bytes_pending)
 	ifs = folio->private;
 	if (ifs) {
 		bool end_read, uptodate;
-		size_t bytes_accounted = folio_size(folio) - bytes_pending;
+		size_t bytes_not_submitted = folio_size(folio) -
+				bytes_submitted;
 
 		spin_lock_irq(&ifs->state_lock);
-		ifs->read_bytes_pending -= bytes_accounted;
+		ifs->read_bytes_pending -= bytes_not_submitted;
 		/*
 		 * If !ifs->read_bytes_pending, this means all pending reads
 		 * by the IO helper have already completed, which means we need
@@ -401,7 +402,7 @@ static void iomap_read_end(struct folio *folio, size_t bytes_pending)
 }
 
 static int iomap_read_folio_iter(struct iomap_iter *iter,
-		struct iomap_read_folio_ctx *ctx, size_t *bytes_pending)
+		struct iomap_read_folio_ctx *ctx, size_t *bytes_submitted)
 {
 	const struct iomap *iomap = &iter->iomap;
 	loff_t pos = iter->pos;
@@ -442,9 +443,9 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
 			folio_zero_range(folio, poff, plen);
 			iomap_set_range_uptodate(folio, poff, plen);
 		} else {
-			if (!*bytes_pending)
+			if (!*bytes_submitted)
 				iomap_read_init(folio);
-			*bytes_pending += plen;
+			*bytes_submitted += plen;
 			ret = ctx->ops->read_folio_range(iter, ctx, plen);
 			if (ret)
 				return ret;
@@ -468,39 +469,40 @@ void iomap_read_folio(const struct iomap_ops *ops,
 		.pos		= folio_pos(folio),
 		.len		= folio_size(folio),
 	};
-	size_t bytes_pending = 0;
+	size_t bytes_submitted = 0;
 	int ret;
 
 	trace_iomap_readpage(iter.inode, 1);
 
 	while ((ret = iomap_iter(&iter, ops)) > 0)
-		iter.status = iomap_read_folio_iter(&iter, ctx, &bytes_pending);
+		iter.status = iomap_read_folio_iter(&iter, ctx,
+				&bytes_submitted);
 
 	if (ctx->ops->submit_read)
 		ctx->ops->submit_read(ctx);
 
-	iomap_read_end(folio, bytes_pending);
+	iomap_read_end(folio, bytes_submitted);
 }
 EXPORT_SYMBOL_GPL(iomap_read_folio);
 
 static int iomap_readahead_iter(struct iomap_iter *iter,
-		struct iomap_read_folio_ctx *ctx, size_t *cur_bytes_pending)
+		struct iomap_read_folio_ctx *ctx, size_t *cur_bytes_submitted)
 {
 	int ret;
 
 	while (iomap_length(iter)) {
 		if (ctx->cur_folio &&
 		    offset_in_folio(ctx->cur_folio, iter->pos) == 0) {
-			iomap_read_end(ctx->cur_folio, *cur_bytes_pending);
+			iomap_read_end(ctx->cur_folio, *cur_bytes_submitted);
 			ctx->cur_folio = NULL;
 		}
 		if (!ctx->cur_folio) {
 			ctx->cur_folio = readahead_folio(ctx->rac);
 			if (WARN_ON_ONCE(!ctx->cur_folio))
 				return -EINVAL;
-			*cur_bytes_pending = 0;
+			*cur_bytes_submitted = 0;
 		}
-		ret = iomap_read_folio_iter(iter, ctx, cur_bytes_pending);
+		ret = iomap_read_folio_iter(iter, ctx, cur_bytes_submitted);
 		if (ret)
 			return ret;
 	}
@@ -532,19 +534,19 @@ void iomap_readahead(const struct iomap_ops *ops,
 		.pos	= readahead_pos(rac),
 		.len	= readahead_length(rac),
 	};
-	size_t cur_bytes_pending;
+	size_t cur_bytes_submitted;
 
 	trace_iomap_readahead(rac->mapping->host, readahead_count(rac));
 
 	while (iomap_iter(&iter, ops) > 0)
 		iter.status = iomap_readahead_iter(&iter, ctx,
-					&cur_bytes_pending);
+					&cur_bytes_submitted);
 
 	if (ctx->ops->submit_read)
 		ctx->ops->submit_read(ctx);
 
 	if (ctx->cur_folio)
-		iomap_read_end(ctx->cur_folio, cur_bytes_pending);
+		iomap_read_end(ctx->cur_folio, cur_bytes_submitted);
 }
 EXPORT_SYMBOL_GPL(iomap_readahead);
 
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v2 2/2] iomap: fix race when reading in all bytes of a folio
  2025-10-27 18:12 [PATCH v2 0/2] vfs-6.19.iomap commit 51311f045375 fixups Joanne Koong
  2025-10-27 18:12 ` [PATCH v2 1/2] iomap: rename bytes_pending/bytes_accounted to bytes_submitted/bytes_not_submitted Joanne Koong
@ 2025-10-27 18:12 ` Joanne Koong
  1 sibling, 0 replies; 3+ messages in thread
From: Joanne Koong @ 2025-10-27 18:12 UTC (permalink / raw)
  To: brauner; +Cc: bfoster, hch, djwong, linux-fsdevel, Christoph Hellwig

There is a race where if all bytes in a folio need to get read in and
the filesystem finishes reading the bytes in before the call to
iomap_read_end(), then bytes_not_submitted in iomap_read_end() will be 0
and the following "ifs->read_bytes_pending -= bytes_not_submitted" will
also be 0 which will trigger an extra folio_end_read() call. This extra
folio_end_read() unlocks the folio for the 2nd time, which sets the lock
bit on the folio, resulting in a permanent lockup.

Fix this by returning from iomap_read_end() early if all bytes are read
in by the filesystem.

Additionally, add some comments to clarify how this logic works.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Fixes: 51311f045375 ("iomap: track pending read bytes more optimally")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reported-by: Brian Foster <bfoster@redhat.com>
---
 fs/iomap/buffered-io.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 4c0d66612a67..654c10a63dc1 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -358,12 +358,37 @@ static void iomap_read_init(struct folio *folio)
 	if (ifs) {
 		size_t len = folio_size(folio);
 
+		/*
+		 * ifs->read_bytes_pending is used to track how many bytes are
+		 * read in asynchronously by the IO helper. We need to track
+		 * this so that we can know when the IO helper has finished
+		 * reading in all the necessary ranges of the folio and can end
+		 * the read.
+		 *
+		 * Increase ->read_bytes_pending by the folio size to start.
+		 * We'll subtract uptodate/zeroed ranges that did not require
+		 * IO in iomap_read_end() after we're done processing the folio.
+		 * We do this because otherwise, we would have to increment
+		 * ifs->read_bytes_pending every time a range in the folio needs
+		 * to be read in, which can get expensive since the spinlock
+		 * needs to be held whenever modifying ifs->read_bytes_pending.
+		 */
 		spin_lock_irq(&ifs->state_lock);
 		ifs->read_bytes_pending += len;
 		spin_unlock_irq(&ifs->state_lock);
 	}
 }
 
+/*
+ * This ends IO if no bytes were submitted to an IO helper.
+ *
+ * If all bytes were submitted to the IO helper then the IO helper is
+ * responsible for ending IO.
+ *
+ * Otherwise, this calibrates ifs->read_bytes_pending to represent only the
+ * submitted bytes (see comment in iomap_read_init()). If the IO helper has
+ * already finished reading in all the submitted bytes, then this will end IO.
+ */
 static void iomap_read_end(struct folio *folio, size_t bytes_submitted)
 {
 	struct iomap_folio_state *ifs;
@@ -381,9 +406,21 @@ static void iomap_read_end(struct folio *folio, size_t bytes_submitted)
 	ifs = folio->private;
 	if (ifs) {
 		bool end_read, uptodate;
+		/*
+		 * Subtract any bytes that were initially accounted to
+		 * read_bytes_pending but skipped for IO.
+		 */
 		size_t bytes_not_submitted = folio_size(folio) -
 				bytes_submitted;
 
+		/*
+		 * If all bytes were submitted for IO then we're done, as the
+		 * IO helper will handle everything in their call(s) to
+		 * iomap_finish_folio_read().
+		 */
+		if (!bytes_not_submitted)
+			return;
+
 		spin_lock_irq(&ifs->state_lock);
 		ifs->read_bytes_pending -= bytes_not_submitted;
 		/*
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-10-27 18:15 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-27 18:12 [PATCH v2 0/2] vfs-6.19.iomap commit 51311f045375 fixups Joanne Koong
2025-10-27 18:12 ` [PATCH v2 1/2] iomap: rename bytes_pending/bytes_accounted to bytes_submitted/bytes_not_submitted Joanne Koong
2025-10-27 18:12 ` [PATCH v2 2/2] iomap: fix race when reading in all bytes of a folio Joanne Koong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).