linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages()
@ 2024-07-18 22:29 Matthew Wilcox (Oracle)
  2024-07-18 22:30 ` [PATCH v2 2/4] ext4: Pipeline buffer reads in mext_page_mkuptodate() Matthew Wilcox (Oracle)
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-07-18 22:29 UTC (permalink / raw)
  To: Theodore Ts'o, Andreas Dilger
  Cc: Matthew Wilcox (Oracle), linux-ext4, linux-fsdevel,
	Hannes Reinecke

This function is very similar to do_mpage_readpage() and a similar
approach to that taken in commit 12ac5a65cb56 will work.  As in
do_mpage_readpage(), we only use this array for checking block contiguity
and we can do that more efficiently with a little arithmetic.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/ext4/readpage.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 8494492582ab..5d3a9dc9a32d 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -221,7 +221,7 @@ int ext4_mpage_readpages(struct inode *inode,
 	sector_t block_in_file;
 	sector_t last_block;
 	sector_t last_block_in_file;
-	sector_t blocks[MAX_BUF_PER_PAGE];
+	sector_t first_block;
 	unsigned page_block;
 	struct block_device *bdev = inode->i_sb->s_bdev;
 	int length;
@@ -263,6 +263,7 @@ int ext4_mpage_readpages(struct inode *inode,
 			unsigned map_offset = block_in_file - map.m_lblk;
 			unsigned last = map.m_len - map_offset;
 
+			first_block = map.m_pblk + map_offset;
 			for (relative_block = 0; ; relative_block++) {
 				if (relative_block == last) {
 					/* needed? */
@@ -271,8 +272,6 @@ int ext4_mpage_readpages(struct inode *inode,
 				}
 				if (page_block == blocks_per_page)
 					break;
-				blocks[page_block] = map.m_pblk + map_offset +
-					relative_block;
 				page_block++;
 				block_in_file++;
 			}
@@ -307,7 +306,9 @@ int ext4_mpage_readpages(struct inode *inode,
 				goto confused;		/* hole -> non-hole */
 
 			/* Contiguous blocks? */
-			if (page_block && blocks[page_block-1] != map.m_pblk-1)
+			if (!page_block)
+				first_block = map.m_pblk;
+			else if (first_block + page_block != map.m_pblk)
 				goto confused;
 			for (relative_block = 0; ; relative_block++) {
 				if (relative_block == map.m_len) {
@@ -316,7 +317,6 @@ int ext4_mpage_readpages(struct inode *inode,
 					break;
 				} else if (page_block == blocks_per_page)
 					break;
-				blocks[page_block] = map.m_pblk+relative_block;
 				page_block++;
 				block_in_file++;
 			}
@@ -339,7 +339,7 @@ int ext4_mpage_readpages(struct inode *inode,
 		 * This folio will go to BIO.  Do we need to send this
 		 * BIO off first?
 		 */
-		if (bio && (last_block_in_bio != blocks[0] - 1 ||
+		if (bio && (last_block_in_bio != first_block - 1 ||
 			    !fscrypt_mergeable_bio(bio, inode, next_block))) {
 		submit_and_realloc:
 			submit_bio(bio);
@@ -355,7 +355,7 @@ int ext4_mpage_readpages(struct inode *inode,
 			fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
 						  GFP_KERNEL);
 			ext4_set_bio_post_read_ctx(bio, inode, folio->index);
-			bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
+			bio->bi_iter.bi_sector = first_block << (blkbits - 9);
 			bio->bi_end_io = mpage_end_io;
 			if (rac)
 				bio->bi_opf |= REQ_RAHEAD;
@@ -371,7 +371,7 @@ int ext4_mpage_readpages(struct inode *inode,
 			submit_bio(bio);
 			bio = NULL;
 		} else
-			last_block_in_bio = blocks[blocks_per_page - 1];
+			last_block_in_bio = first_block + blocks_per_page - 1;
 		continue;
 	confused:
 		if (bio) {
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 2/4] ext4: Pipeline buffer reads in mext_page_mkuptodate()
  2024-07-18 22:29 [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Matthew Wilcox (Oracle)
@ 2024-07-18 22:30 ` Matthew Wilcox (Oracle)
  2024-07-18 22:30 ` [PATCH v2 3/4] ext4: Remove array of buffer_heads from mext_page_mkuptodate() Matthew Wilcox (Oracle)
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-07-18 22:30 UTC (permalink / raw)
  To: Theodore Ts'o, Andreas Dilger
  Cc: Matthew Wilcox (Oracle), linux-ext4, linux-fsdevel,
	Hannes Reinecke

Instead of synchronously reading one buffer at a time, submit reads
as we walk the buffers in the first loop, then wait for them in the
second loop.  This should be significantly more efficient, particularly
on HDDs, but I have not measured.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/ext4/move_extent.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 204f53b23622..6d651ad788ac 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -174,7 +174,9 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
 	sector_t block;
 	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
 	unsigned int blocksize, block_start, block_end;
-	int i, err,  nr = 0, partial = 0;
+	int i, nr = 0;
+	bool partial = false;
+
 	BUG_ON(!folio_test_locked(folio));
 	BUG_ON(folio_test_writeback(folio));
 
@@ -192,13 +194,13 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
 		block_end = block_start + blocksize;
 		if (block_end <= from || block_start >= to) {
 			if (!buffer_uptodate(bh))
-				partial = 1;
+				partial = true;
 			continue;
 		}
 		if (buffer_uptodate(bh))
 			continue;
 		if (!buffer_mapped(bh)) {
-			err = ext4_get_block(inode, block, bh, 0);
+			int err = ext4_get_block(inode, block, bh, 0);
 			if (err)
 				return err;
 			if (!buffer_mapped(bh)) {
@@ -207,6 +209,12 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
 				continue;
 			}
 		}
+		lock_buffer(bh);
+		if (buffer_uptodate(bh)) {
+			unlock_buffer(bh);
+			continue;
+		}
+		ext4_read_bh_nowait(bh, 0, NULL);
 		BUG_ON(nr >= MAX_BUF_PER_PAGE);
 		arr[nr++] = bh;
 	}
@@ -216,11 +224,10 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
 
 	for (i = 0; i < nr; i++) {
 		bh = arr[i];
-		if (!bh_uptodate_or_lock(bh)) {
-			err = ext4_read_bh(bh, 0, NULL);
-			if (err)
-				return err;
-		}
+		wait_on_buffer(bh);
+		if (buffer_uptodate(bh))
+			continue;
+		return -EIO;
 	}
 out:
 	if (!partial)
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 3/4] ext4: Remove array of buffer_heads from mext_page_mkuptodate()
  2024-07-18 22:29 [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Matthew Wilcox (Oracle)
  2024-07-18 22:30 ` [PATCH v2 2/4] ext4: Pipeline buffer reads in mext_page_mkuptodate() Matthew Wilcox (Oracle)
@ 2024-07-18 22:30 ` Matthew Wilcox (Oracle)
  2024-07-18 22:30 ` [PATCH v2 4/4] ext4: Tidy the BH loop in mext_page_mkuptodate() Matthew Wilcox (Oracle)
  2024-08-27 12:47 ` [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Theodore Ts'o
  3 siblings, 0 replies; 5+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-07-18 22:30 UTC (permalink / raw)
  To: Theodore Ts'o, Andreas Dilger
  Cc: Matthew Wilcox (Oracle), linux-ext4, linux-fsdevel,
	Hannes Reinecke

Iterate the folio's list of buffer_heads twice instead of keeping
an array of pointers.  This solves a too-large-array-for-stack problem
on architectures with a ridiculoously large PAGE_SIZE and prepares
ext4 to support larger folios.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/ext4/move_extent.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 6d651ad788ac..660bf34a5c4b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -166,15 +166,14 @@ mext_folio_double_lock(struct inode *inode1, struct inode *inode2,
 	return 0;
 }
 
-/* Force page buffers uptodate w/o dropping page's lock */
-static int
-mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
+/* Force folio buffers uptodate w/o dropping folio's lock */
+static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
 {
 	struct inode *inode = folio->mapping->host;
 	sector_t block;
-	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+	struct buffer_head *bh, *head;
 	unsigned int blocksize, block_start, block_end;
-	int i, nr = 0;
+	int nr = 0;
 	bool partial = false;
 
 	BUG_ON(!folio_test_locked(folio));
@@ -215,20 +214,23 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
 			continue;
 		}
 		ext4_read_bh_nowait(bh, 0, NULL);
-		BUG_ON(nr >= MAX_BUF_PER_PAGE);
-		arr[nr++] = bh;
+		nr++;
 	}
 	/* No io required */
 	if (!nr)
 		goto out;
 
-	for (i = 0; i < nr; i++) {
-		bh = arr[i];
+	bh = head;
+	do {
+		if (bh_offset(bh) + blocksize <= from)
+			continue;
+		if (bh_offset(bh) > to)
+			break;
 		wait_on_buffer(bh);
 		if (buffer_uptodate(bh))
 			continue;
 		return -EIO;
-	}
+	} while ((bh = bh->b_this_page) != head);
 out:
 	if (!partial)
 		folio_mark_uptodate(folio);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 4/4] ext4: Tidy the BH loop in mext_page_mkuptodate()
  2024-07-18 22:29 [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Matthew Wilcox (Oracle)
  2024-07-18 22:30 ` [PATCH v2 2/4] ext4: Pipeline buffer reads in mext_page_mkuptodate() Matthew Wilcox (Oracle)
  2024-07-18 22:30 ` [PATCH v2 3/4] ext4: Remove array of buffer_heads from mext_page_mkuptodate() Matthew Wilcox (Oracle)
@ 2024-07-18 22:30 ` Matthew Wilcox (Oracle)
  2024-08-27 12:47 ` [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Theodore Ts'o
  3 siblings, 0 replies; 5+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-07-18 22:30 UTC (permalink / raw)
  To: Theodore Ts'o, Andreas Dilger
  Cc: Matthew Wilcox (Oracle), linux-ext4, linux-fsdevel,
	Hannes Reinecke

This for loop is somewhat hard to read; turn it into a normal BH
do-while loop.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/ext4/move_extent.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 660bf34a5c4b..516897b0218e 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -187,9 +187,11 @@ static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
 	if (!head)
 		head = create_empty_buffers(folio, blocksize, 0);
 
-	block = (sector_t)folio->index << (PAGE_SHIFT - inode->i_blkbits);
-	for (bh = head, block_start = 0; bh != head || !block_start;
-	     block++, block_start = block_end, bh = bh->b_this_page) {
+	block = folio_pos(folio) >> inode->i_blkbits;
+	block_end = 0;
+	bh = head;
+	do {
+		block_start = block_end;
 		block_end = block_start + blocksize;
 		if (block_end <= from || block_start >= to) {
 			if (!buffer_uptodate(bh))
@@ -215,7 +217,8 @@ static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
 		}
 		ext4_read_bh_nowait(bh, 0, NULL);
 		nr++;
-	}
+	} while (block++, (bh = bh->b_this_page) != head);
+
 	/* No io required */
 	if (!nr)
 		goto out;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages()
  2024-07-18 22:29 [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Matthew Wilcox (Oracle)
                   ` (2 preceding siblings ...)
  2024-07-18 22:30 ` [PATCH v2 4/4] ext4: Tidy the BH loop in mext_page_mkuptodate() Matthew Wilcox (Oracle)
@ 2024-08-27 12:47 ` Theodore Ts'o
  3 siblings, 0 replies; 5+ messages in thread
From: Theodore Ts'o @ 2024-08-27 12:47 UTC (permalink / raw)
  To: Andreas Dilger, Matthew Wilcox (Oracle)
  Cc: Theodore Ts'o, linux-ext4, linux-fsdevel, Hannes Reinecke


On Thu, 18 Jul 2024 23:29:59 +0100, Matthew Wilcox (Oracle) wrote:
> This function is very similar to do_mpage_readpage() and a similar
> approach to that taken in commit 12ac5a65cb56 will work.  As in
> do_mpage_readpage(), we only use this array for checking block contiguity
> and we can do that more efficiently with a little arithmetic.
> 
> 

Applied, thanks!

[1/4] ext4: Reduce stack usage in ext4_mpage_readpages()
      commit: e37c9e173bff50a2d57dfecdd694457c00ce5a8c
[2/4] ext4: Pipeline buffer reads in mext_page_mkuptodate()
      commit: 368a83cebbb949adbcc20877c35367178497d9cc
[3/4] ext4: Remove array of buffer_heads from mext_page_mkuptodate()
      commit: a40759fb16ae839f8c769174fde017564ea564ff
[4/4] ext4: Tidy the BH loop in mext_page_mkuptodate()
      commit: 3e3a693551c3e9b45575e94ca2d1d670a47b3fcc

Best regards,
-- 
Theodore Ts'o <tytso@mit.edu>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-08-27 12:47 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-18 22:29 [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Matthew Wilcox (Oracle)
2024-07-18 22:30 ` [PATCH v2 2/4] ext4: Pipeline buffer reads in mext_page_mkuptodate() Matthew Wilcox (Oracle)
2024-07-18 22:30 ` [PATCH v2 3/4] ext4: Remove array of buffer_heads from mext_page_mkuptodate() Matthew Wilcox (Oracle)
2024-07-18 22:30 ` [PATCH v2 4/4] ext4: Tidy the BH loop in mext_page_mkuptodate() Matthew Wilcox (Oracle)
2024-08-27 12:47 ` [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Theodore Ts'o

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).