* [PATCH 1/2] btrfs: move the delalloc range bitmap search into extent_io.c
2024-09-15 23:11 [PATCH 0/2] btrfs: de-couple subpage locked and delalloc range Qu Wenruo
@ 2024-09-15 23:11 ` Qu Wenruo
2024-09-15 23:11 ` [PATCH 2/2] btrfs: mark all dirty sectors as locked inside writepage_delalloc() Qu Wenruo
1 sibling, 0 replies; 3+ messages in thread
From: Qu Wenruo @ 2024-09-15 23:11 UTC (permalink / raw)
To: linux-btrfs
Currently for subpage (sector size < page size) cases, we reuse subpage
locked bitmap to find out all delalloc ranges we have locked, and run
all those found ranges.
However such reuse is not perfect, e.g.:
0 32K 64K 96K 128K
| |////////||///////| |////|
120K
For above range, writepage_delalloc() for page 0 will handle the range
[32K, 96k), note delalloc range can be beyond the page boundary.
But writepage_delalloc() for page 64K will only handle range [120K,
128K), as the previous run on page 0 has already handled range [64K,
96K).
Meanwhile for the writeback we should expect range [64K, 96K) to also be
locked, this leads to the mismatch from locked bitmap and delalloc
range.
This is not causing problems yet, but it's still an inconsistent
behavior.
So instead of relying on the subpage locked bitmap, move the delalloc
range search using local @delalloc_bitmap, so that we can remove the
existing btrfs_folio_find_writer_locked().
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/extent_io.c | 44 ++++++++++++++++++++++++++++++++++++++++-
fs/btrfs/subpage.c | 47 --------------------------------------------
fs/btrfs/subpage.h | 4 ----
3 files changed, 43 insertions(+), 52 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 982bb469046f..1210e24b6277 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1101,6 +1101,45 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
return ret;
}
+static void set_delalloc_bitmap(struct folio *folio, unsigned long *delalloc_bitmap,
+ u64 start, u32 len)
+{
+ struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
+ const u64 folio_start = folio_pos(folio);
+ unsigned int start_bit;
+ unsigned int nbits;
+
+ ASSERT(start >= folio_start && start + len <= folio_start + PAGE_SIZE);
+ start_bit = (start - folio_start) >> fs_info->sectorsize_bits;
+ nbits = len >> fs_info->sectorsize_bits;
+ ASSERT(bitmap_test_range_all_zero(delalloc_bitmap, start_bit, nbits));
+ bitmap_set(delalloc_bitmap, start_bit, nbits);
+}
+
+static bool find_next_delalloc_bitmap(struct folio *folio,
+ unsigned long *delalloc_bitmap, u64 start,
+ u64 *found_start, u32 *found_len)
+{
+ struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
+ const u64 folio_start = folio_pos(folio);
+ const unsigned int bitmap_size = fs_info->sectors_per_page;
+ unsigned int start_bit;
+ unsigned int first_zero;
+ unsigned int first_set;
+
+ ASSERT(start >= folio_start && start < folio_start + PAGE_SIZE);
+
+ start_bit = (start - folio_start) >> fs_info->sectorsize_bits;
+ first_set = find_next_bit(delalloc_bitmap, bitmap_size, start_bit);
+ if (first_set >= bitmap_size)
+ return false;
+
+ *found_start = folio_start + (first_set << fs_info->sectorsize_bits);
+ first_zero = find_next_zero_bit(delalloc_bitmap, bitmap_size, first_set);
+ *found_len = (first_zero - first_set) << fs_info->sectorsize_bits;
+ return true;
+}
+
/*
* helper for extent_writepage(), doing all of the delayed allocation setup.
*
@@ -1120,6 +1159,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
const bool is_subpage = btrfs_is_subpage(fs_info, folio->mapping);
const u64 page_start = folio_pos(folio);
const u64 page_end = page_start + folio_size(folio) - 1;
+ unsigned long delalloc_bitmap = 0;
/*
* Save the last found delalloc end. As the delalloc end can go beyond
* page boundary, thus we cannot rely on subpage bitmap to locate the
@@ -1147,6 +1187,8 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
delalloc_start = delalloc_end + 1;
continue;
}
+ set_delalloc_bitmap(folio, &delalloc_bitmap, delalloc_start,
+ min(delalloc_end, page_end) + 1 - delalloc_start);
btrfs_folio_set_writer_lock(fs_info, folio, delalloc_start,
min(delalloc_end, page_end) + 1 -
delalloc_start);
@@ -1174,7 +1216,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
found_len = last_delalloc_end + 1 - found_start;
found = true;
} else {
- found = btrfs_subpage_find_writer_locked(fs_info, folio,
+ found = find_next_delalloc_bitmap(folio, &delalloc_bitmap,
delalloc_start, &found_start, &found_len);
}
if (!found)
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 8a8724c4a0a1..bb02d2ef899f 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -801,53 +801,6 @@ void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
spin_unlock_irqrestore(&subpage->lock, flags);
}
-/*
- * Find any subpage writer locked range inside @folio, starting at file offset
- * @search_start. The caller should ensure the folio is locked.
- *
- * Return true and update @found_start_ret and @found_len_ret to the first
- * writer locked range.
- * Return false if there is no writer locked range.
- */
-bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
- struct folio *folio, u64 search_start,
- u64 *found_start_ret, u32 *found_len_ret)
-{
- struct btrfs_subpage *subpage = folio_get_private(folio);
- const u32 sectors_per_page = fs_info->sectors_per_page;
- const unsigned int len = PAGE_SIZE - offset_in_page(search_start);
- const unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
- locked, search_start, len);
- const unsigned int locked_bitmap_start = sectors_per_page * btrfs_bitmap_nr_locked;
- const unsigned int locked_bitmap_end = locked_bitmap_start + sectors_per_page;
- unsigned long flags;
- int first_zero;
- int first_set;
- bool found = false;
-
- ASSERT(folio_test_locked(folio));
- spin_lock_irqsave(&subpage->lock, flags);
- first_set = find_next_bit(subpage->bitmaps, locked_bitmap_end, start_bit);
- if (first_set >= locked_bitmap_end)
- goto out;
-
- found = true;
-
- *found_start_ret = folio_pos(folio) +
- ((first_set - locked_bitmap_start) << fs_info->sectorsize_bits);
- /*
- * Since @first_set is ensured to be smaller than locked_bitmap_end
- * here, @found_start_ret should be inside the folio.
- */
- ASSERT(*found_start_ret < folio_pos(folio) + PAGE_SIZE);
-
- first_zero = find_next_zero_bit(subpage->bitmaps, locked_bitmap_end, first_set);
- *found_len_ret = (first_zero - first_set) << fs_info->sectorsize_bits;
-out:
- spin_unlock_irqrestore(&subpage->lock, flags);
- return found;
-}
-
#define GET_SUBPAGE_BITMAP(subpage, fs_info, name, dst) \
{ \
const int sectors_per_page = fs_info->sectors_per_page; \
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index 4b85d91d0e18..d16fbddeda68 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -108,10 +108,6 @@ void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);
void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
struct folio *folio, unsigned long bitmap);
-bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
- struct folio *folio, u64 search_start,
- u64 *found_start_ret, u32 *found_len_ret);
-
/*
* Template for subpage related operations.
*
--
2.46.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH 2/2] btrfs: mark all dirty sectors as locked inside writepage_delalloc()
2024-09-15 23:11 [PATCH 0/2] btrfs: de-couple subpage locked and delalloc range Qu Wenruo
2024-09-15 23:11 ` [PATCH 1/2] btrfs: move the delalloc range bitmap search into extent_io.c Qu Wenruo
@ 2024-09-15 23:11 ` Qu Wenruo
1 sibling, 0 replies; 3+ messages in thread
From: Qu Wenruo @ 2024-09-15 23:11 UTC (permalink / raw)
To: linux-btrfs
Currently we only mark sectors as locked if there is a *NEW* delalloc
range for it.
But NEW delalloc range is not the same as dirty sectors we want to
submit, e.g:
0 32K 64K 96K 128K
| |////////||///////| |////|
120K
For above 64K page size case, writepage_delalloc() for page 0 will find
and lock the delalloc range [32K, 96K), which is beyond the page
boundary.
Then when writepage_delalloc() is called for the page 64K, since [64K,
96K) is already locked, only [120K, 128K) will be locked.
This means, although range [64K, 96K) is dirty and will be submitted
later by extent_writepage_io(), it will not be marked as locked.
This is fine for now, as we call btrfs_folio_end_writer_lock_bitmap() to
free every non-compressed sector, and compression is only allowed for
full page range.
But this is not safe for future sector perfect compression support, as
this can lead to double folio unlock:
Thread A | Thread B
---------------------------------------+--------------------------------
| submit_one_async_extent()
| |- extent_clear_unlock_delalloc()
extent_writepage() | |- btrfs_folio_end_writer_lock()
|- btrfs_folio_edn_writer_lock_bitmap()| |- btrfs_subpage_end_and_test_writer()
| | | |- atomic_sub_and_test()
| | | /* Now the atomic value is 0 */
|- if (atomic_read() == 0) | |
|- folio_unlock() | |- folio_unlock()
The root cause is the above range [64K, 96K) is dirtied and should also
be locked but it isn't.
So to make everything more consistent and prepare for the incoming
sector perfect compression, mark all dirty sectors as locked.
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/extent_io.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1210e24b6277..1041a7060b2a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1170,6 +1170,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
u64 delalloc_end = page_end;
u64 delalloc_to_write = 0;
int ret = 0;
+ int bit;
/* Save the dirty bitmap as our submission bitmap will be a subset of it. */
if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) {
@@ -1179,6 +1180,12 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
bio_ctrl->submit_bitmap = 1;
}
+ for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) {
+ u64 start = page_start + (bit << fs_info->sectorsize_bits);
+
+ btrfs_folio_set_writer_lock(fs_info, folio, start, fs_info->sectorsize);
+ }
+
/* Lock all (subpage) delalloc ranges inside the folio first. */
while (delalloc_start < page_end) {
delalloc_end = page_end;
@@ -1189,9 +1196,6 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
}
set_delalloc_bitmap(folio, &delalloc_bitmap, delalloc_start,
min(delalloc_end, page_end) + 1 - delalloc_start);
- btrfs_folio_set_writer_lock(fs_info, folio, delalloc_start,
- min(delalloc_end, page_end) + 1 -
- delalloc_start);
last_delalloc_end = delalloc_end;
delalloc_start = delalloc_end + 1;
}
--
2.46.0
^ permalink raw reply related [flat|nested] 3+ messages in thread