* [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages()
@ 2024-07-18 22:29 Matthew Wilcox (Oracle)
2024-07-18 22:30 ` [PATCH v2 2/4] ext4: Pipeline buffer reads in mext_page_mkuptodate() Matthew Wilcox (Oracle)
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-07-18 22:29 UTC (permalink / raw)
To: Theodore Ts'o, Andreas Dilger
Cc: Matthew Wilcox (Oracle), linux-ext4, linux-fsdevel,
Hannes Reinecke
This function is very similar to do_mpage_readpage() and a similar
approach to that taken in commit 12ac5a65cb56 will work. As in
do_mpage_readpage(), we only use this array for checking block contiguity
and we can do that more efficiently with a little arithmetic.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/ext4/readpage.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 8494492582ab..5d3a9dc9a32d 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -221,7 +221,7 @@ int ext4_mpage_readpages(struct inode *inode,
sector_t block_in_file;
sector_t last_block;
sector_t last_block_in_file;
- sector_t blocks[MAX_BUF_PER_PAGE];
+ sector_t first_block;
unsigned page_block;
struct block_device *bdev = inode->i_sb->s_bdev;
int length;
@@ -263,6 +263,7 @@ int ext4_mpage_readpages(struct inode *inode,
unsigned map_offset = block_in_file - map.m_lblk;
unsigned last = map.m_len - map_offset;
+ first_block = map.m_pblk + map_offset;
for (relative_block = 0; ; relative_block++) {
if (relative_block == last) {
/* needed? */
@@ -271,8 +272,6 @@ int ext4_mpage_readpages(struct inode *inode,
}
if (page_block == blocks_per_page)
break;
- blocks[page_block] = map.m_pblk + map_offset +
- relative_block;
page_block++;
block_in_file++;
}
@@ -307,7 +306,9 @@ int ext4_mpage_readpages(struct inode *inode,
goto confused; /* hole -> non-hole */
/* Contiguous blocks? */
- if (page_block && blocks[page_block-1] != map.m_pblk-1)
+ if (!page_block)
+ first_block = map.m_pblk;
+ else if (first_block + page_block != map.m_pblk)
goto confused;
for (relative_block = 0; ; relative_block++) {
if (relative_block == map.m_len) {
@@ -316,7 +317,6 @@ int ext4_mpage_readpages(struct inode *inode,
break;
} else if (page_block == blocks_per_page)
break;
- blocks[page_block] = map.m_pblk+relative_block;
page_block++;
block_in_file++;
}
@@ -339,7 +339,7 @@ int ext4_mpage_readpages(struct inode *inode,
* This folio will go to BIO. Do we need to send this
* BIO off first?
*/
- if (bio && (last_block_in_bio != blocks[0] - 1 ||
+ if (bio && (last_block_in_bio != first_block - 1 ||
!fscrypt_mergeable_bio(bio, inode, next_block))) {
submit_and_realloc:
submit_bio(bio);
@@ -355,7 +355,7 @@ int ext4_mpage_readpages(struct inode *inode,
fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
GFP_KERNEL);
ext4_set_bio_post_read_ctx(bio, inode, folio->index);
- bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
+ bio->bi_iter.bi_sector = first_block << (blkbits - 9);
bio->bi_end_io = mpage_end_io;
if (rac)
bio->bi_opf |= REQ_RAHEAD;
@@ -371,7 +371,7 @@ int ext4_mpage_readpages(struct inode *inode,
submit_bio(bio);
bio = NULL;
} else
- last_block_in_bio = blocks[blocks_per_page - 1];
+ last_block_in_bio = first_block + blocks_per_page - 1;
continue;
confused:
if (bio) {
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 2/4] ext4: Pipeline buffer reads in mext_page_mkuptodate()
2024-07-18 22:29 [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Matthew Wilcox (Oracle)
@ 2024-07-18 22:30 ` Matthew Wilcox (Oracle)
2024-07-18 22:30 ` [PATCH v2 3/4] ext4: Remove array of buffer_heads from mext_page_mkuptodate() Matthew Wilcox (Oracle)
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-07-18 22:30 UTC (permalink / raw)
To: Theodore Ts'o, Andreas Dilger
Cc: Matthew Wilcox (Oracle), linux-ext4, linux-fsdevel,
Hannes Reinecke
Instead of synchronously reading one buffer at a time, submit reads
as we walk the buffers in the first loop, then wait for them in the
second loop. This should be significantly more efficient, particularly
on HDDs, but I have not measured.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/ext4/move_extent.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 204f53b23622..6d651ad788ac 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -174,7 +174,9 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
sector_t block;
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
unsigned int blocksize, block_start, block_end;
- int i, err, nr = 0, partial = 0;
+ int i, nr = 0;
+ bool partial = false;
+
BUG_ON(!folio_test_locked(folio));
BUG_ON(folio_test_writeback(folio));
@@ -192,13 +194,13 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
- partial = 1;
+ partial = true;
continue;
}
if (buffer_uptodate(bh))
continue;
if (!buffer_mapped(bh)) {
- err = ext4_get_block(inode, block, bh, 0);
+ int err = ext4_get_block(inode, block, bh, 0);
if (err)
return err;
if (!buffer_mapped(bh)) {
@@ -207,6 +209,12 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
continue;
}
}
+ lock_buffer(bh);
+ if (buffer_uptodate(bh)) {
+ unlock_buffer(bh);
+ continue;
+ }
+ ext4_read_bh_nowait(bh, 0, NULL);
BUG_ON(nr >= MAX_BUF_PER_PAGE);
arr[nr++] = bh;
}
@@ -216,11 +224,10 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
for (i = 0; i < nr; i++) {
bh = arr[i];
- if (!bh_uptodate_or_lock(bh)) {
- err = ext4_read_bh(bh, 0, NULL);
- if (err)
- return err;
- }
+ wait_on_buffer(bh);
+ if (buffer_uptodate(bh))
+ continue;
+ return -EIO;
}
out:
if (!partial)
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 3/4] ext4: Remove array of buffer_heads from mext_page_mkuptodate()
2024-07-18 22:29 [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Matthew Wilcox (Oracle)
2024-07-18 22:30 ` [PATCH v2 2/4] ext4: Pipeline buffer reads in mext_page_mkuptodate() Matthew Wilcox (Oracle)
@ 2024-07-18 22:30 ` Matthew Wilcox (Oracle)
2024-07-18 22:30 ` [PATCH v2 4/4] ext4: Tidy the BH loop in mext_page_mkuptodate() Matthew Wilcox (Oracle)
2024-08-27 12:47 ` [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Theodore Ts'o
3 siblings, 0 replies; 5+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-07-18 22:30 UTC (permalink / raw)
To: Theodore Ts'o, Andreas Dilger
Cc: Matthew Wilcox (Oracle), linux-ext4, linux-fsdevel,
Hannes Reinecke
Iterate the folio's list of buffer_heads twice instead of keeping
an array of pointers. This solves a too-large-array-for-stack problem
on architectures with a ridiculoously large PAGE_SIZE and prepares
ext4 to support larger folios.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/ext4/move_extent.c | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 6d651ad788ac..660bf34a5c4b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -166,15 +166,14 @@ mext_folio_double_lock(struct inode *inode1, struct inode *inode2,
return 0;
}
-/* Force page buffers uptodate w/o dropping page's lock */
-static int
-mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
+/* Force folio buffers uptodate w/o dropping folio's lock */
+static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
{
struct inode *inode = folio->mapping->host;
sector_t block;
- struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+ struct buffer_head *bh, *head;
unsigned int blocksize, block_start, block_end;
- int i, nr = 0;
+ int nr = 0;
bool partial = false;
BUG_ON(!folio_test_locked(folio));
@@ -215,20 +214,23 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
continue;
}
ext4_read_bh_nowait(bh, 0, NULL);
- BUG_ON(nr >= MAX_BUF_PER_PAGE);
- arr[nr++] = bh;
+ nr++;
}
/* No io required */
if (!nr)
goto out;
- for (i = 0; i < nr; i++) {
- bh = arr[i];
+ bh = head;
+ do {
+ if (bh_offset(bh) + blocksize <= from)
+ continue;
+ if (bh_offset(bh) > to)
+ break;
wait_on_buffer(bh);
if (buffer_uptodate(bh))
continue;
return -EIO;
- }
+ } while ((bh = bh->b_this_page) != head);
out:
if (!partial)
folio_mark_uptodate(folio);
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 4/4] ext4: Tidy the BH loop in mext_page_mkuptodate()
2024-07-18 22:29 [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Matthew Wilcox (Oracle)
2024-07-18 22:30 ` [PATCH v2 2/4] ext4: Pipeline buffer reads in mext_page_mkuptodate() Matthew Wilcox (Oracle)
2024-07-18 22:30 ` [PATCH v2 3/4] ext4: Remove array of buffer_heads from mext_page_mkuptodate() Matthew Wilcox (Oracle)
@ 2024-07-18 22:30 ` Matthew Wilcox (Oracle)
2024-08-27 12:47 ` [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Theodore Ts'o
3 siblings, 0 replies; 5+ messages in thread
From: Matthew Wilcox (Oracle) @ 2024-07-18 22:30 UTC (permalink / raw)
To: Theodore Ts'o, Andreas Dilger
Cc: Matthew Wilcox (Oracle), linux-ext4, linux-fsdevel,
Hannes Reinecke
This for loop is somewhat hard to read; turn it into a normal BH
do-while loop.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/ext4/move_extent.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 660bf34a5c4b..516897b0218e 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -187,9 +187,11 @@ static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
if (!head)
head = create_empty_buffers(folio, blocksize, 0);
- block = (sector_t)folio->index << (PAGE_SHIFT - inode->i_blkbits);
- for (bh = head, block_start = 0; bh != head || !block_start;
- block++, block_start = block_end, bh = bh->b_this_page) {
+ block = folio_pos(folio) >> inode->i_blkbits;
+ block_end = 0;
+ bh = head;
+ do {
+ block_start = block_end;
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
@@ -215,7 +217,8 @@ static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
}
ext4_read_bh_nowait(bh, 0, NULL);
nr++;
- }
+ } while (block++, (bh = bh->b_this_page) != head);
+
/* No io required */
if (!nr)
goto out;
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages()
2024-07-18 22:29 [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Matthew Wilcox (Oracle)
` (2 preceding siblings ...)
2024-07-18 22:30 ` [PATCH v2 4/4] ext4: Tidy the BH loop in mext_page_mkuptodate() Matthew Wilcox (Oracle)
@ 2024-08-27 12:47 ` Theodore Ts'o
3 siblings, 0 replies; 5+ messages in thread
From: Theodore Ts'o @ 2024-08-27 12:47 UTC (permalink / raw)
To: Andreas Dilger, Matthew Wilcox (Oracle)
Cc: Theodore Ts'o, linux-ext4, linux-fsdevel, Hannes Reinecke
On Thu, 18 Jul 2024 23:29:59 +0100, Matthew Wilcox (Oracle) wrote:
> This function is very similar to do_mpage_readpage() and a similar
> approach to that taken in commit 12ac5a65cb56 will work. As in
> do_mpage_readpage(), we only use this array for checking block contiguity
> and we can do that more efficiently with a little arithmetic.
>
>
Applied, thanks!
[1/4] ext4: Reduce stack usage in ext4_mpage_readpages()
commit: e37c9e173bff50a2d57dfecdd694457c00ce5a8c
[2/4] ext4: Pipeline buffer reads in mext_page_mkuptodate()
commit: 368a83cebbb949adbcc20877c35367178497d9cc
[3/4] ext4: Remove array of buffer_heads from mext_page_mkuptodate()
commit: a40759fb16ae839f8c769174fde017564ea564ff
[4/4] ext4: Tidy the BH loop in mext_page_mkuptodate()
commit: 3e3a693551c3e9b45575e94ca2d1d670a47b3fcc
Best regards,
--
Theodore Ts'o <tytso@mit.edu>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-08-27 12:47 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-18 22:29 [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Matthew Wilcox (Oracle)
2024-07-18 22:30 ` [PATCH v2 2/4] ext4: Pipeline buffer reads in mext_page_mkuptodate() Matthew Wilcox (Oracle)
2024-07-18 22:30 ` [PATCH v2 3/4] ext4: Remove array of buffer_heads from mext_page_mkuptodate() Matthew Wilcox (Oracle)
2024-07-18 22:30 ` [PATCH v2 4/4] ext4: Tidy the BH loop in mext_page_mkuptodate() Matthew Wilcox (Oracle)
2024-08-27 12:47 ` [PATCH v2 1/4] ext4: Reduce stack usage in ext4_mpage_readpages() Theodore Ts'o
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).