[PATCH 4/5] fs/buffer: add iteration support for block_read_full_folio()

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Luis Chamberlain <mcgrof@kernel.org>
To: hare@suse.de, willy@infradead.org, dave@stgolabs.net,
	david@fromorbit.com, djwong@kernel.org, kbusch@kernel.org
Cc: john.g.garry@oracle.com, hch@lst.de, ritesh.list@gmail.com,
	linux-fsdevel@vger.kernel.org, linux-xfs@vger.kernel.org,
	linux-mm@kvack.org, linux-block@vger.kernel.org,
	gost.dev@samsung.com, p.raghav@samsung.com, da.gomez@samsung.com,
	kernel@pankajraghav.com, mcgrof@kernel.org
Subject: [PATCH 4/5] fs/buffer: add iteration support for block_read_full_folio()
Date: Tue, 17 Dec 2024 18:26:25 -0800	[thread overview]
Message-ID: <20241218022626.3668119-5-mcgrof@kernel.org> (raw)
In-Reply-To: <20241218022626.3668119-1-mcgrof@kernel.org>

Provide a helper to iterate on buffer heads on a folio. We do this
as a preliminary step so to make the subsequent changes easier to
read. Right now we use an array on stack to loop over all buffer heads
in a folio of size MAX_BUF_PER_PAGE, however on CPUs where the system
page size is quite larger like Hexagon with 256 KiB page size support
this can mean the kernel can end up spewing spews stack growth
warnings.

To be able to break this down into smaller array chunks add support for
processing smaller array chunks of buffer heads at a time. The used
array size is not changed yet, that will be done in a subsequent patch,
this just adds the iterator support and logic.

While at it clarify the booleans used on bh_read_batch_async() and
how they are only valid in consideration when we've processed all
buffer-heads of a folio, that is when we're on the last buffer head in
a folio:

  * bh_folio_reads
  * unmapped

Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 fs/buffer.c | 134 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 97 insertions(+), 37 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 1aeef7dd2281..b8ba72f2f211 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2402,66 +2402,75 @@ static void bh_read_batch_async(struct folio *folio,
 #define bh_next(__bh, __head) \
     (bh_is_last(__bh, __head) ? NULL : (__bh)->b_this_page)
 
+/* Starts from a pivot which you initialize */
+#define for_each_bh_pivot(__pivot, __last, __head)	\
+    for ((__pivot) = __last = (__pivot);		\
+         (__pivot);					\
+         (__pivot) = bh_next(__pivot, __head),		\
+	 (__last) = (__pivot) ? (__pivot) : (__last))
+
 /* Starts from the provided head */
 #define for_each_bh(__tmp, __head)			\
     for ((__tmp) = (__head);				\
          (__tmp);					\
          (__tmp) = bh_next(__tmp, __head))
 
+struct bh_iter {
+	sector_t iblock;
+	get_block_t *get_block;
+	bool any_get_block_error;
+	int unmapped;
+	int bh_folio_reads;
+};
+
 /*
- * Generic "read_folio" function for block devices that have the normal
- * get_block functionality. This is most of the block device filesystems.
- * Reads the folio asynchronously --- the unlock_buffer() and
- * set/clear_buffer_uptodate() functions propagate buffer state into the
- * folio once IO has completed.
+ * Reads up to MAX_BUF_PER_PAGE buffer heads at a time on a folio on the given
+ * block range iblock to lblock and helps update the number of buffer-heads
+ * which were not uptodate or unmapped for which we issued an async read for
+ * on iter->bh_folio_reads for the full folio. Returns the last buffer-head we
+ * worked on.
  */
-int block_read_full_folio(struct folio *folio, get_block_t *get_block)
-{
-	struct inode *inode = folio->mapping->host;
-	sector_t iblock, lblock;
-	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
-	size_t blocksize;
-	int nr;
-	int fully_mapped = 1;
-	bool page_error = false;
-	loff_t limit = i_size_read(inode);
-
-	/* This is needed for ext4. */
-	if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
-		limit = inode->i_sb->s_maxbytes;
+static struct buffer_head *bh_read_iter(struct folio *folio,
+					struct buffer_head *pivot,
+					struct buffer_head *head,
+					struct inode *inode,
+					struct bh_iter *iter, sector_t lblock)
+{
+	struct buffer_head *arr[MAX_BUF_PER_PAGE];
+	struct buffer_head *bh = pivot, *last;
+	int nr = 0, i = 0;
+	size_t blocksize = head->b_size;
+	bool no_reads = false;
+	bool fully_mapped = false;
 
-	VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
+	/* Stage one - collect buffer heads we need issue a read for */
 
-	head = folio_create_buffers(folio, inode, 0);
-	blocksize = head->b_size;
+	/* collect buffers not uptodate and not mapped yet */
+	for_each_bh_pivot(bh, last, head) {
+		BUG_ON(nr >= MAX_BUF_PER_PAGE);
 
-	iblock = div_u64(folio_pos(folio), blocksize);
-	lblock = div_u64(limit + blocksize - 1, blocksize);
-	nr = 0;
-
-	/* Stage one - collect buffer heads we need issue a read for */
-	for_each_bh(bh, head) {
 		if (buffer_uptodate(bh)) {
-			iblock++;
+			iter->iblock++;
 			continue;
 		}
 
 		if (!buffer_mapped(bh)) {
 			int err = 0;
 
-			fully_mapped = 0;
-			if (iblock < lblock) {
+			iter->unmapped++;
+			if (iter->iblock < lblock) {
 				WARN_ON(bh->b_size != blocksize);
-				err = get_block(inode, iblock, bh, 0);
+				err = iter->get_block(inode, iter->iblock,
+						      bh, 0);
 				if (err)
-					page_error = true;
+					iter->any_get_block_error = true;
 			}
 			if (!buffer_mapped(bh)) {
 				folio_zero_range(folio, bh_offset(bh),
 						blocksize);
 				if (!err)
 					set_buffer_uptodate(bh);
-				iblock++;
+				iter->iblock++;
 				continue;
 			}
 			/*
@@ -2469,15 +2478,66 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block)
 			 * synchronously
 			 */
 			if (buffer_uptodate(bh)) {
-				iblock++;
+				iter->iblock++;
 				continue;
 			}
 		}
 		arr[nr++] = bh;
-		iblock++;
+		iter->iblock++;
+	}
+
+	iter->bh_folio_reads += nr;
+
+	WARN_ON_ONCE(!bh_is_last(last, head));
+
+	if (bh_is_last(last, head)) {
+		if (!iter->bh_folio_reads)
+			no_reads = true;
+		if (!iter->unmapped)
+			fully_mapped = true;
 	}
 
-	bh_read_batch_async(folio, nr, arr, fully_mapped, nr == 0, page_error);
+	bh_read_batch_async(folio, nr, arr, fully_mapped, no_reads,
+			    iter->any_get_block_error);
+
+	return last;
+}
+
+/*
+ * Generic "read_folio" function for block devices that have the normal
+ * get_block functionality. This is most of the block device filesystems.
+ * Reads the folio asynchronously --- the unlock_buffer() and
+ * set/clear_buffer_uptodate() functions propagate buffer state into the
+ * folio once IO has completed.
+ */
+int block_read_full_folio(struct folio *folio, get_block_t *get_block)
+{
+	struct inode *inode = folio->mapping->host;
+	sector_t lblock;
+	size_t blocksize;
+	struct buffer_head *bh, *head;
+	struct bh_iter iter = {
+		.get_block = get_block,
+		.unmapped = 0,
+		.any_get_block_error = false,
+		.bh_folio_reads = 0,
+	};
+	loff_t limit = i_size_read(inode);
+
+	/* This is needed for ext4. */
+	if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
+		limit = inode->i_sb->s_maxbytes;
+
+	VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
+
+	head = folio_create_buffers(folio, inode, 0);
+	blocksize = head->b_size;
+
+	iter.iblock = div_u64(folio_pos(folio), blocksize);
+	lblock = div_u64(limit + blocksize - 1, blocksize);
+
+	for_each_bh(bh, head)
+		bh = bh_read_iter(folio, bh, head, inode, &iter, lblock);
 
 	return 0;
 }
-- 
2.43.0

next prev parent reply	other threads:[~2024-12-18  2:26 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-18  2:26 [PATCH 0/5] fs/buffer: strack reduction on async read Luis Chamberlain
2024-12-18  2:26 ` [PATCH 1/5] fs/buffer: move async batch read code into a helper Luis Chamberlain
2024-12-18  2:26 ` [PATCH 2/5] fs/buffer: simplify block_read_full_folio() with bh_offset() Luis Chamberlain
2024-12-18  2:26 ` [PATCH 3/5] fs/buffer: add a for_each_bh() for block_read_full_folio() Luis Chamberlain
2024-12-18 19:20   ` Matthew Wilcox
2024-12-18  2:26 ` Luis Chamberlain [this message]
2024-12-18  2:26 ` [PATCH 5/5] fs/buffer: reduce stack usage on bh_read_iter() Luis Chamberlain
2024-12-18  2:47   ` Luis Chamberlain
2024-12-18 20:05 ` [PATCH 0/5] fs/buffer: strack reduction on async read Matthew Wilcox
2024-12-19  2:27   ` Luis Chamberlain
2024-12-19  3:51     ` Matthew Wilcox
2024-12-30 17:30       ` Luis Chamberlain
2025-01-31 16:54       ` Luis Chamberlain
2025-01-31 22:01         ` Matthew Wilcox
2025-02-03 14:00           ` Luis Chamberlain
2024-12-19  6:28 ` Christoph Hellwig
2024-12-19 17:53   ` Luis Chamberlain

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:1aeef7dd228 dfblob:b8ba72f2f21 )
 OR (
bs:"[PATCH 4/5] fs/buffer: add iteration support for block_read_full_folio()" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241218022626.3668119-5-mcgrof@kernel.org \
    --to=mcgrof@kernel.org \
    --cc=da.gomez@samsung.com \
    --cc=dave@stgolabs.net \
    --cc=david@fromorbit.com \
    --cc=djwong@kernel.org \
    --cc=gost.dev@samsung.com \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=john.g.garry@oracle.com \
    --cc=kbusch@kernel.org \
    --cc=kernel@pankajraghav.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=p.raghav@samsung.com \
    --cc=ritesh.list@gmail.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).