From: Joanne Koong <joannelkoong@gmail.com>
To: linux-mm@kvack.org, brauner@kernel.org
Cc: willy@infradead.org, jack@suse.cz, hch@infradead.org,
djwong@kernel.org, jlayton@kernel.org,
linux-fsdevel@vger.kernel.org, kernel-team@meta.com
Subject: [PATCH v2 12/12] iomap: add granular dirty and writeback accounting
Date: Fri, 29 Aug 2025 16:39:42 -0700 [thread overview]
Message-ID: <20250829233942.3607248-13-joannelkoong@gmail.com> (raw)
In-Reply-To: <20250829233942.3607248-1-joannelkoong@gmail.com>
Add granular dirty and writeback accounting for large folios. These
stats are used by the mm layer for dirty balancing and throttling.
Having granular dirty and writeback accounting helps prevent
over-aggressive balancing and throttling.
There are 4 places in iomap this commit affects:
a) filemap dirtying, which now calls filemap_dirty_folio_pages()
b) writeback_iter with setting the wbc->no_stats_accounting bit and
calling clear_dirty_for_io_stats()
c) starting writeback, which now calls __folio_start_writeback()
d) ending writeback, which now calls folio_end_writeback_pages()
This relies on using the ifs->state dirty bitmap to track dirty pages in
the folio. As such, this can only be utilized on filesystems where the
block size >= PAGE_SIZE.
Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
fs/iomap/buffered-io.c | 140 ++++++++++++++++++++++++++++++++++++++---
1 file changed, 132 insertions(+), 8 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 4f021dcaaffe..bf33a5361a39 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -20,6 +20,8 @@ struct iomap_folio_state {
spinlock_t state_lock;
unsigned int read_bytes_pending;
atomic_t write_bytes_pending;
+ /* number of pages being currently written back */
+ unsigned nr_pages_writeback;
/*
* Each block has two bits in this bitmap:
@@ -139,6 +141,29 @@ static unsigned ifs_next_clean_block(struct folio *folio,
blks + start_blk) - blks;
}
+static unsigned ifs_count_dirty_pages(struct folio *folio)
+{
+ struct inode *inode = folio->mapping->host;
+ unsigned block_size = i_blocksize(inode);
+ unsigned start_blk, end_blk;
+ unsigned blks, nblks = 0;
+
+ start_blk = 0;
+ blks = i_blocks_per_folio(inode, folio);
+ end_blk = (i_size_read(inode) - 1) >> inode->i_blkbits;
+ end_blk = min(end_blk, i_blocks_per_folio(inode, folio) - 1);
+
+ while (start_blk <= end_blk) {
+ start_blk = ifs_next_dirty_block(folio, start_blk, end_blk);
+ if (start_blk > end_blk)
+ break;
+ nblks++;
+ start_blk++;
+ }
+
+ return nblks * (block_size >> PAGE_SHIFT);
+}
+
static unsigned ifs_find_dirty_range(struct folio *folio,
struct iomap_folio_state *ifs, u64 *range_start, u64 range_end)
{
@@ -220,6 +245,58 @@ static void iomap_set_range_dirty(struct folio *folio, size_t off, size_t len)
ifs_set_range_dirty(folio, ifs, off, len);
}
+static long iomap_get_range_newly_dirtied(struct folio *folio, loff_t pos,
+ unsigned len)
+{
+ struct inode *inode = folio->mapping->host;
+ unsigned block_size = i_blocksize(inode);
+ unsigned start_blk, end_blk;
+ unsigned nblks = 0;
+
+ start_blk = pos >> inode->i_blkbits;
+ end_blk = (pos + len - 1) >> inode->i_blkbits;
+ end_blk = min(end_blk, i_blocks_per_folio(inode, folio) - 1);
+
+ while (start_blk <= end_blk) {
+ /* count how many clean blocks there are */
+ start_blk = ifs_next_clean_block(folio, start_blk, end_blk);
+ if (start_blk > end_blk)
+ break;
+ nblks++;
+ start_blk++;
+ }
+
+ return nblks * (block_size >> PAGE_SHIFT);
+}
+
+static bool iomap_granular_dirty_pages(struct folio *folio)
+{
+ struct iomap_folio_state *ifs = folio->private;
+
+ if (!ifs)
+ return false;
+
+ return i_blocksize(folio->mapping->host) >= PAGE_SIZE;
+}
+
+static bool iomap_dirty_folio_range(struct address_space *mapping,
+ struct folio *folio, loff_t pos, unsigned len)
+{
+ long nr_new_dirty_pages;
+
+ if (!iomap_granular_dirty_pages(folio)) {
+ iomap_set_range_dirty(folio, pos, len);
+ return filemap_dirty_folio(mapping, folio);
+ }
+
+ nr_new_dirty_pages = iomap_get_range_newly_dirtied(folio, pos, len);
+ if (!nr_new_dirty_pages)
+ return false;
+
+ iomap_set_range_dirty(folio, pos, len);
+ return filemap_dirty_folio_pages(mapping, folio, nr_new_dirty_pages);
+}
+
static struct iomap_folio_state *ifs_alloc(struct inode *inode,
struct folio *folio, unsigned int flags)
{
@@ -712,8 +789,7 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio)
size_t len = folio_size(folio);
ifs_alloc(inode, folio, 0);
- iomap_set_range_dirty(folio, 0, len);
- return filemap_dirty_folio(mapping, folio);
+ return iomap_dirty_folio_range(mapping, folio, 0, len);
}
EXPORT_SYMBOL_GPL(iomap_dirty_folio);
@@ -937,8 +1013,8 @@ static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
if (unlikely(copied < len && !folio_test_uptodate(folio)))
return false;
iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len);
- iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied);
- filemap_dirty_folio(inode->i_mapping, folio);
+ iomap_dirty_folio_range(inode->i_mapping, folio,
+ offset_in_folio(folio, pos), copied);
return true;
}
@@ -1613,6 +1689,29 @@ void iomap_start_folio_write(struct inode *inode, struct folio *folio,
}
EXPORT_SYMBOL_GPL(iomap_start_folio_write);
+static void iomap_folio_start_writeback(struct folio *folio)
+{
+ struct iomap_folio_state *ifs = folio->private;
+
+ if (!iomap_granular_dirty_pages(folio))
+ return folio_start_writeback(folio);
+
+ __folio_start_writeback(folio, false, ifs->nr_pages_writeback);
+}
+
+static void iomap_folio_end_writeback(struct folio *folio)
+{
+ struct iomap_folio_state *ifs = folio->private;
+ long nr_pages_writeback;
+
+ if (!iomap_granular_dirty_pages(folio))
+ return folio_end_writeback(folio);
+
+ nr_pages_writeback = ifs->nr_pages_writeback;
+ ifs->nr_pages_writeback = 0;
+ folio_end_writeback_pages(folio, nr_pages_writeback);
+}
+
void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
size_t len)
{
@@ -1622,7 +1721,7 @@ void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) <= 0);
if (!ifs || atomic_sub_and_test(len, &ifs->write_bytes_pending))
- folio_end_writeback(folio);
+ iomap_folio_end_writeback(folio);
}
EXPORT_SYMBOL_GPL(iomap_finish_folio_write);
@@ -1710,6 +1809,21 @@ static bool iomap_writeback_handle_eof(struct folio *folio, struct inode *inode,
return true;
}
+static void iomap_update_dirty_stats(struct folio *folio)
+{
+ struct iomap_folio_state *ifs = folio->private;
+ long nr_dirty_pages;
+
+ if (iomap_granular_dirty_pages(folio)) {
+ nr_dirty_pages = ifs_count_dirty_pages(folio);
+ ifs->nr_pages_writeback = nr_dirty_pages;
+ } else {
+ nr_dirty_pages = folio_nr_pages(folio);
+ }
+
+ clear_dirty_for_io_stats(folio, nr_dirty_pages);
+}
+
int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
{
struct iomap_folio_state *ifs = folio->private;
@@ -1727,6 +1841,8 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
trace_iomap_writeback_folio(inode, pos, folio_size(folio));
+ iomap_update_dirty_stats(folio);
+
if (!iomap_writeback_handle_eof(folio, inode, &end_pos))
return 0;
WARN_ON_ONCE(end_pos <= pos);
@@ -1734,6 +1850,7 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
if (i_blocks_per_folio(inode, folio) > 1) {
if (!ifs) {
ifs = ifs_alloc(inode, folio, 0);
+ ifs->nr_pages_writeback = folio_nr_pages(folio);
iomap_set_range_dirty(folio, 0, end_pos - pos);
}
@@ -1751,7 +1868,7 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
* Set the writeback bit ASAP, as the I/O completion for the single
* block per folio case happen hit as soon as we're submitting the bio.
*/
- folio_start_writeback(folio);
+ iomap_folio_start_writeback(folio);
/*
* Walk through the folio to find dirty areas to write back.
@@ -1784,10 +1901,10 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
*/
if (ifs) {
if (atomic_dec_and_test(&ifs->write_bytes_pending))
- folio_end_writeback(folio);
+ iomap_folio_end_writeback(folio);
} else {
if (!wb_pending)
- folio_end_writeback(folio);
+ iomap_folio_end_writeback(folio);
}
mapping_set_error(inode->i_mapping, error);
return error;
@@ -1809,6 +1926,13 @@ iomap_writepages(struct iomap_writepage_ctx *wpc)
PF_MEMALLOC))
return -EIO;
+ /*
+ * iomap opts out of the default wbc stats accounting because it does
+ * its own granular dirty/writeback accounting (see
+ * iomap_update_dirty_stats()).
+ */
+ wpc->wbc->no_stats_accounting = true;
+
while ((folio = writeback_iter(mapping, wpc->wbc, folio, &error))) {
error = iomap_writeback_folio(wpc, folio);
folio_unlock(folio);
--
2.47.3
next prev parent reply other threads:[~2025-08-29 23:40 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-29 23:39 [PATCH v2 00/12] mm/iomap: add granular dirty and writeback accounting Joanne Koong
2025-08-29 23:39 ` [PATCH v2 01/12] mm: pass number of pages to __folio_start_writeback() Joanne Koong
2025-09-03 11:48 ` David Hildenbrand
2025-09-03 20:02 ` Darrick J. Wong
2025-09-03 20:05 ` David Hildenbrand
2025-09-03 23:12 ` Joanne Koong
2025-08-29 23:39 ` [PATCH v2 02/12] mm: pass number of pages to __folio_end_writeback() Joanne Koong
2025-08-29 23:39 ` [PATCH v2 03/12] mm: add folio_end_writeback_pages() helper Joanne Koong
2025-08-29 23:39 ` [PATCH v2 04/12] mm: pass number of pages dirtied to __folio_mark_dirty() Joanne Koong
2025-08-29 23:39 ` [PATCH v2 05/12] mm: add filemap_dirty_folio_pages() helper Joanne Koong
2025-08-29 23:39 ` [PATCH v2 06/12] mm: add __folio_clear_dirty_for_io() helper Joanne Koong
2025-08-29 23:39 ` [PATCH v2 07/12] mm: add no_stats_accounting bitfield to wbc Joanne Koong
2025-08-29 23:39 ` [PATCH v2 08/12] mm: refactor clearing dirty stats into helper function Joanne Koong
2025-08-29 23:39 ` [PATCH v2 09/12] mm: add clear_dirty_for_io_stats() helper Joanne Koong
2025-08-29 23:39 ` [PATCH v2 10/12] iomap: refactor dirty bitmap iteration Joanne Koong
2025-09-03 18:53 ` Brian Foster
2025-09-03 19:59 ` Darrick J. Wong
2025-08-29 23:39 ` [PATCH v2 11/12] iomap: refactor uptodate " Joanne Koong
2025-08-29 23:39 ` Joanne Koong [this message]
2025-09-02 23:46 ` [PATCH v2 12/12] iomap: add granular dirty and writeback accounting Darrick J. Wong
2025-09-03 18:48 ` Brian Foster
2025-09-04 0:35 ` Joanne Koong
2025-09-04 2:52 ` Darrick J. Wong
2025-09-04 8:53 ` [PATCH v2 00/12] mm/iomap: " Jan Kara
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250829233942.3607248-13-joannelkoong@gmail.com \
--to=joannelkoong@gmail.com \
--cc=brauner@kernel.org \
--cc=djwong@kernel.org \
--cc=hch@infradead.org \
--cc=jack@suse.cz \
--cc=jlayton@kernel.org \
--cc=kernel-team@meta.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).