From: Brian Foster <bfoster@redhat.com>
To: linux-fsdevel@vger.kernel.org
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH RFCv2 2/4] iomap: optional zero range dirty folio processing
Date: Fri, 13 Dec 2024 10:05:26 -0500 [thread overview]
Message-ID: <20241213150528.1003662-3-bfoster@redhat.com> (raw)
In-Reply-To: <20241213150528.1003662-1-bfoster@redhat.com>
The only way zero range can currently process unwritten mappings
with dirty pagecache is to check whether the range is dirty before
mapping lookup and then flush when at least one underlying mapping
is unwritten. This ordering is required to prevent iomap lookup from
racing with folio writeback and reclaim.
Since zero range can skip ranges of unwritten mappings that are
clean in cache, this operation can be improved by allowing the
filesystem to provide the set of folios backed by such mappings that
require zeroing up. In turn, rather than flush or iterate file
offsets, zero range can process each folio as normal and skip any
clean or uncached ranges in between.
As a first pass prototype solution, stuff a folio_batch in struct
iomap, provide a helper that the fs can use to populate the batch at
lookup time, and define a flag to indicate the mapping was checked.
Note that since the helper is intended for use under internal fs
locks, it trylocks folios in order to filter out clean folios. This
loosely follows the logic from filemap_range_has_writeback().
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/iomap/buffered-io.c | 77 ++++++++++++++++++++++++++++++++++++++++--
fs/iomap/iter.c | 6 ++++
include/linux/iomap.h | 4 +++
3 files changed, 84 insertions(+), 3 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 7fdf593b58b1..5492dc7fe963 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -751,6 +751,15 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, size_t len)
if (!mapping_large_folio_support(iter->inode->i_mapping))
len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
+ if (iter->fbatch) {
+ struct folio *folio = folio_batch_next(iter->fbatch);
+ if (folio) {
+ folio_get(folio);
+ folio_lock(folio);
+ }
+ return folio;
+ }
+
if (folio_ops && folio_ops->get_folio)
return folio_ops->get_folio(iter, pos, len);
else
@@ -839,6 +848,15 @@ static int iomap_write_begin(struct iomap_iter *iter, struct folio **foliop,
}
}
+ if (!folio) {
+ WARN_ON_ONCE(!iter->fbatch);
+ len = 0;
+ goto out;
+ } else if (folio_pos(folio) > iter->pos) {
+ BUG_ON(folio_pos(folio) - iter->pos >= iomap_length(iter));
+ iomap_iter_advance(iter, folio_pos(folio) - iter->pos);
+ }
+
pos = iomap_trim_folio_range(iter, folio, poffset, &len);
BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
if (srcmap != &iter->iomap)
@@ -854,6 +872,7 @@ static int iomap_write_begin(struct iomap_iter *iter, struct folio **foliop,
if (unlikely(status))
goto out_unlock;
+out:
*foliop = folio;
*plen = len;
return 0;
@@ -1374,6 +1393,11 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
if (iter->iomap.flags & IOMAP_F_STALE)
break;
+ if (!folio) {
+ iomap_iter_advance(iter, iomap_length(iter));
+ break;
+ }
+
/* warn about zeroing folios beyond eof that won't write back */
WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size);
@@ -1393,6 +1417,49 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
return 0;
}
+loff_t
+iomap_fill_dirty_folios(
+ struct iomap_iter *iter,
+ loff_t offset,
+ loff_t length)
+{
+ struct address_space *mapping = iter->inode->i_mapping;
+ struct folio_batch fbatch;
+ loff_t end_pos = offset + length;
+ pgoff_t start = offset >> PAGE_SHIFT;
+ pgoff_t end = (end_pos - 1) >> PAGE_SHIFT;
+
+ folio_batch_init(&fbatch);
+ iter->fbatch = kmalloc(sizeof(struct folio_batch), GFP_KERNEL);
+ if (!iter->fbatch)
+ return end_pos;
+ folio_batch_init(iter->fbatch);
+
+ while (filemap_get_folios(mapping, &start, end, &fbatch) &&
+ folio_batch_space(iter->fbatch)) {
+ struct folio *folio;
+ while ((folio = folio_batch_next(&fbatch))) {
+ if (folio_trylock(folio)) {
+ bool clean = !folio_test_dirty(folio) &&
+ !folio_test_writeback(folio);
+ folio_unlock(folio);
+ if (clean)
+ continue;
+ }
+
+ folio_get(folio);
+ if (!folio_batch_add(iter->fbatch, folio)) {
+ end_pos = folio_pos(folio) + folio_size(folio);
+ break;
+ }
+ }
+ folio_batch_release(&fbatch);
+ }
+
+ return end_pos;
+}
+EXPORT_SYMBOL_GPL(iomap_fill_dirty_folios);
+
int
iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
const struct iomap_ops *ops)
@@ -1420,7 +1487,7 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
* flushing on partial eof zeroing, special case it to zero the
* unaligned start portion if already dirty in pagecache.
*/
- if (off &&
+ if (!iter.fbatch && off &&
filemap_range_needs_writeback(mapping, pos, pos + plen - 1)) {
iter.len = plen;
while ((ret = iomap_iter(&iter, ops)) > 0)
@@ -1441,8 +1508,12 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
while ((ret = iomap_iter(&iter, ops)) > 0) {
const struct iomap *srcmap = iomap_iter_srcmap(&iter);
- if (srcmap->type == IOMAP_HOLE ||
- srcmap->type == IOMAP_UNWRITTEN) {
+ if (WARN_ON_ONCE(iter.fbatch && srcmap->type != IOMAP_UNWRITTEN))
+ return -EIO;
+
+ if (!iter.fbatch &&
+ (srcmap->type == IOMAP_HOLE ||
+ srcmap->type == IOMAP_UNWRITTEN)) {
loff_t proc = iomap_length(&iter);
if (range_dirty) {
diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c
index 5fe0edb51fe5..911846d7386c 100644
--- a/fs/iomap/iter.c
+++ b/fs/iomap/iter.c
@@ -9,6 +9,12 @@
static inline void iomap_iter_reset_iomap(struct iomap_iter *iter)
{
+ if (iter->fbatch) {
+ folio_batch_release(iter->fbatch);
+ kfree(iter->fbatch);
+ iter->fbatch = NULL;
+ }
+
iter->processed = 0;
memset(&iter->iomap, 0, sizeof(iter->iomap));
memset(&iter->srcmap, 0, sizeof(iter->srcmap));
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 704ed98159f7..d01e5265de27 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/mm_types.h>
#include <linux/blkdev.h>
+#include <linux/pagevec.h>
struct address_space;
struct fiemap_extent_info;
@@ -228,6 +229,7 @@ struct iomap_iter {
unsigned flags;
struct iomap iomap;
struct iomap srcmap;
+ struct folio_batch *fbatch;
void *private;
};
@@ -315,6 +317,8 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len);
bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops);
+loff_t iomap_fill_dirty_folios(struct iomap_iter *iter, loff_t offset,
+ loff_t length);
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
bool *did_zero, const struct iomap_ops *ops);
int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
--
2.47.0
next prev parent reply other threads:[~2024-12-13 15:03 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-13 15:05 [PATCH RFCv2 0/4] iomap: zero range folio batch processing prototype Brian Foster
2024-12-13 15:05 ` [PATCH RFCv2 1/4] iomap: prep work for folio_batch support Brian Foster
2024-12-13 15:05 ` Brian Foster [this message]
2025-01-09 7:20 ` [PATCH RFCv2 2/4] iomap: optional zero range dirty folio processing Christoph Hellwig
2025-01-10 17:53 ` Brian Foster
2025-01-13 4:51 ` Christoph Hellwig
2025-01-13 14:32 ` Brian Foster
2025-01-15 5:47 ` Christoph Hellwig
2025-01-16 14:14 ` Brian Foster
2024-12-13 15:05 ` [PATCH RFCv2 3/4] xfs: always trim mapping to requested range for zero range Brian Foster
2025-01-09 7:22 ` Christoph Hellwig
2024-12-13 15:05 ` [PATCH RFCv2 4/4] xfs: fill dirty folios on zero range of unwritten mappings Brian Foster
2025-01-09 7:26 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241213150528.1003662-3-bfoster@redhat.com \
--to=bfoster@redhat.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox