From: Zhang Yi <yi.zhang@huaweicloud.com>
To: linux-ext4@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
tytso@mit.edu, adilger.kernel@dilger.ca, jack@suse.cz,
ojaswin@linux.ibm.com, sashal@kernel.org, yi.zhang@huawei.com,
yi.zhang@huaweicloud.com, libaokun1@huawei.com,
yukuai3@huawei.com, yangerkun@huawei.com
Subject: [PATCH v3 01/10] ext4: process folios writeback in bytes
Date: Tue, 1 Jul 2025 21:06:26 +0800 [thread overview]
Message-ID: <20250701130635.4079595-2-yi.zhang@huaweicloud.com> (raw)
In-Reply-To: <20250701130635.4079595-1-yi.zhang@huaweicloud.com>
From: Zhang Yi <yi.zhang@huawei.com>
Since ext4 supports large folios, processing writebacks in pages is no
longer appropriate, it can be modified to process writebacks in bytes.
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
fs/ext4/inode.c | 70 +++++++++++++++++++------------------
include/trace/events/ext4.h | 13 ++++---
2 files changed, 42 insertions(+), 41 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be9a4cba35fd..ba81df0d87dd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1667,11 +1667,12 @@ struct mpage_da_data {
unsigned int can_map:1; /* Can writepages call map blocks? */
/* These are internal state of ext4_do_writepages() */
- pgoff_t first_page; /* The first page to write */
- pgoff_t next_page; /* Current page to examine */
- pgoff_t last_page; /* Last page to examine */
+ loff_t start_pos; /* The start pos to write */
+ loff_t next_pos; /* Current pos to examine */
+ loff_t end_pos; /* Last pos to examine */
+
/*
- * Extent to map - this can be after first_page because that can be
+ * Extent to map - this can be after start_pos because that can be
* fully mapped. We somewhat abuse m_flags to store whether the extent
* is delalloc or unwritten.
*/
@@ -1691,38 +1692,38 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping;
- /* This is necessary when next_page == 0. */
- if (mpd->first_page >= mpd->next_page)
+ /* This is necessary when next_pos == 0. */
+ if (mpd->start_pos >= mpd->next_pos)
return;
mpd->scanned_until_end = 0;
- index = mpd->first_page;
- end = mpd->next_page - 1;
if (invalidate) {
ext4_lblk_t start, last;
- start = index << (PAGE_SHIFT - inode->i_blkbits);
- last = end << (PAGE_SHIFT - inode->i_blkbits);
+ start = EXT4_B_TO_LBLK(inode, mpd->start_pos);
+ last = mpd->next_pos >> inode->i_blkbits;
/*
* avoid racing with extent status tree scans made by
* ext4_insert_delayed_block()
*/
down_write(&EXT4_I(inode)->i_data_sem);
- ext4_es_remove_extent(inode, start, last - start + 1);
+ ext4_es_remove_extent(inode, start, last - start);
up_write(&EXT4_I(inode)->i_data_sem);
}
folio_batch_init(&fbatch);
- while (index <= end) {
- nr = filemap_get_folios(mapping, &index, end, &fbatch);
+ index = mpd->start_pos >> PAGE_SHIFT;
+ end = mpd->next_pos >> PAGE_SHIFT;
+ while (index < end) {
+ nr = filemap_get_folios(mapping, &index, end - 1, &fbatch);
if (nr == 0)
break;
for (i = 0; i < nr; i++) {
struct folio *folio = fbatch.folios[i];
- if (folio->index < mpd->first_page)
+ if (folio_pos(folio) < mpd->start_pos)
continue;
- if (folio_next_index(folio) - 1 > end)
+ if (folio_next_index(folio) > end)
continue;
BUG_ON(!folio_test_locked(folio));
BUG_ON(folio_test_writeback(folio));
@@ -2024,7 +2025,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
static void mpage_folio_done(struct mpage_da_data *mpd, struct folio *folio)
{
- mpd->first_page += folio_nr_pages(folio);
+ mpd->start_pos += folio_size(folio);
folio_unlock(folio);
}
@@ -2034,7 +2035,7 @@ static int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio)
loff_t size;
int err;
- BUG_ON(folio->index != mpd->first_page);
+ WARN_ON_ONCE(folio_pos(folio) != mpd->start_pos);
folio_clear_dirty_for_io(folio);
/*
* We have to be very careful here! Nothing protects writeback path
@@ -2446,7 +2447,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
* Update on-disk size after IO is submitted. Races with
* truncate are avoided by checking i_size under i_data_sem.
*/
- disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT;
+ disksize = mpd->start_pos;
if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) {
int err2;
loff_t i_size;
@@ -2549,8 +2550,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
struct address_space *mapping = mpd->inode->i_mapping;
struct folio_batch fbatch;
unsigned int nr_folios;
- pgoff_t index = mpd->first_page;
- pgoff_t end = mpd->last_page;
+ pgoff_t index = mpd->start_pos >> PAGE_SHIFT;
+ pgoff_t end = mpd->end_pos >> PAGE_SHIFT;
xa_mark_t tag;
int i, err = 0;
int blkbits = mpd->inode->i_blkbits;
@@ -2565,7 +2566,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
tag = PAGECACHE_TAG_DIRTY;
mpd->map.m_len = 0;
- mpd->next_page = index;
+ mpd->next_pos = mpd->start_pos;
if (ext4_should_journal_data(mpd->inode)) {
handle = ext4_journal_start(mpd->inode, EXT4_HT_WRITE_PAGE,
bpp);
@@ -2596,7 +2597,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
goto out;
/* If we can't merge this page, we are done. */
- if (mpd->map.m_len > 0 && mpd->next_page != folio->index)
+ if (mpd->map.m_len > 0 &&
+ mpd->next_pos != folio_pos(folio))
goto out;
if (handle) {
@@ -2642,8 +2644,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
}
if (mpd->map.m_len == 0)
- mpd->first_page = folio->index;
- mpd->next_page = folio_next_index(folio);
+ mpd->start_pos = folio_pos(folio);
+ mpd->next_pos = folio_pos(folio) + folio_size(folio);
/*
* Writeout when we cannot modify metadata is simple.
* Just submit the page. For data=journal mode we
@@ -2786,18 +2788,18 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
writeback_index = mapping->writeback_index;
if (writeback_index)
cycled = 0;
- mpd->first_page = writeback_index;
- mpd->last_page = -1;
+ mpd->start_pos = writeback_index << PAGE_SHIFT;
+ mpd->end_pos = -1;
} else {
- mpd->first_page = wbc->range_start >> PAGE_SHIFT;
- mpd->last_page = wbc->range_end >> PAGE_SHIFT;
+ mpd->start_pos = wbc->range_start;
+ mpd->end_pos = wbc->range_end;
}
ext4_io_submit_init(&mpd->io_submit, wbc);
retry:
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
- tag_pages_for_writeback(mapping, mpd->first_page,
- mpd->last_page);
+ tag_pages_for_writeback(mapping, mpd->start_pos >> PAGE_SHIFT,
+ mpd->end_pos >> PAGE_SHIFT);
blk_start_plug(&plug);
/*
@@ -2857,7 +2859,7 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
}
mpd->do_map = 1;
- trace_ext4_da_write_pages(inode, mpd->first_page, wbc);
+ trace_ext4_da_write_pages(inode, mpd->start_pos, wbc);
ret = mpage_prepare_extent_to_map(mpd);
if (!ret && mpd->map.m_len)
ret = mpage_map_and_submit_extent(handle, mpd,
@@ -2914,8 +2916,8 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
blk_finish_plug(&plug);
if (!ret && !cycled && wbc->nr_to_write > 0) {
cycled = 1;
- mpd->last_page = writeback_index - 1;
- mpd->first_page = 0;
+ mpd->end_pos = (writeback_index << PAGE_SHIFT) - 1;
+ mpd->start_pos = 0;
goto retry;
}
@@ -2925,7 +2927,7 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
* Set the writeback_index so that range_cyclic
* mode will write it back later
*/
- mapping->writeback_index = mpd->first_page;
+ mapping->writeback_index = mpd->start_pos >> PAGE_SHIFT;
out_writepages:
trace_ext4_writepages_result(inode, wbc, ret,
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 156908641e68..62d52997b5c6 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -483,15 +483,15 @@ TRACE_EVENT(ext4_writepages,
);
TRACE_EVENT(ext4_da_write_pages,
- TP_PROTO(struct inode *inode, pgoff_t first_page,
+ TP_PROTO(struct inode *inode, loff_t start_pos,
struct writeback_control *wbc),
- TP_ARGS(inode, first_page, wbc),
+ TP_ARGS(inode, start_pos, wbc),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
- __field( pgoff_t, first_page )
+ __field( loff_t, start_pos )
__field( long, nr_to_write )
__field( int, sync_mode )
),
@@ -499,15 +499,14 @@ TRACE_EVENT(ext4_da_write_pages,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
- __entry->first_page = first_page;
+ __entry->start_pos = start_pos;
__entry->nr_to_write = wbc->nr_to_write;
__entry->sync_mode = wbc->sync_mode;
),
- TP_printk("dev %d,%d ino %lu first_page %lu nr_to_write %ld "
- "sync_mode %d",
+ TP_printk("dev %d,%d ino %lu start_pos 0x%llx nr_to_write %ld sync_mode %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
- (unsigned long) __entry->ino, __entry->first_page,
+ (unsigned long) __entry->ino, __entry->start_pos,
__entry->nr_to_write, __entry->sync_mode)
);
--
2.46.1
next prev parent reply other threads:[~2025-07-01 13:20 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-01 13:06 [PATCH v3 00/10] ext4: fix insufficient credits when writing back large folios Zhang Yi
2025-07-01 13:06 ` Zhang Yi [this message]
2025-07-02 14:00 ` [PATCH v3 01/10] ext4: process folios writeback in bytes Jan Kara
2025-07-03 2:03 ` Zhang Yi
2025-07-01 13:06 ` [PATCH v3 02/10] ext4: move the calculation of wbc->nr_to_write to mpage_folio_done() Zhang Yi
2025-07-01 13:06 ` [PATCH v3 03/10] ext4: fix stale data if it bail out of the extents mapping loop Zhang Yi
2025-07-02 14:07 ` Jan Kara
2025-07-03 2:05 ` Zhang Yi
2025-07-01 13:06 ` [PATCH v3 04/10] ext4: refactor the block allocation process of ext4_page_mkwrite() Zhang Yi
2025-07-02 14:10 ` Jan Kara
2025-07-01 13:06 ` [PATCH v3 05/10] ext4: restart handle if credits are insufficient during allocating blocks Zhang Yi
2025-07-02 14:18 ` Jan Kara
2025-07-03 2:13 ` Zhang Yi
2025-07-03 16:27 ` Jan Kara
2025-07-04 1:40 ` Zhang Yi
2025-07-04 8:18 ` Jan Kara
2025-07-04 8:47 ` Zhang Yi
2025-07-01 13:06 ` [PATCH v3 06/10] ext4: enhance tracepoints during the folios writeback Zhang Yi
2025-07-02 14:20 ` Jan Kara
2025-07-01 13:06 ` [PATCH v3 07/10] ext4: correct the reserved credits for extent conversion Zhang Yi
2025-07-01 13:06 ` [PATCH v3 08/10] ext4: reserved credits for one extent during the folio writeback Zhang Yi
2025-07-02 14:34 ` Jan Kara
2025-07-01 13:06 ` [PATCH v3 09/10] ext4: replace ext4_writepage_trans_blocks() Zhang Yi
2025-07-02 14:50 ` Jan Kara
2025-07-01 13:06 ` [PATCH v3 10/10] ext4: fix insufficient credits calculation in ext4_meta_trans_blocks() Zhang Yi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250701130635.4079595-2-yi.zhang@huaweicloud.com \
--to=yi.zhang@huaweicloud.com \
--cc=adilger.kernel@dilger.ca \
--cc=jack@suse.cz \
--cc=libaokun1@huawei.com \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=ojaswin@linux.ibm.com \
--cc=sashal@kernel.org \
--cc=tytso@mit.edu \
--cc=yangerkun@huawei.com \
--cc=yi.zhang@huawei.com \
--cc=yukuai3@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).