* [PATCH 1/7] btrfs: Use percpu refcounting for block groups
[not found] <20260112161549.2786827-1-martin@urbackup.org>
@ 2026-01-12 16:17 ` Martin Raiber
2026-01-12 22:32 ` Boris Burkov
2026-01-14 6:06 ` kernel test robot
2026-01-12 16:17 ` [PATCH 2/7] btrfs: Use percpu semaphore for space info groups_sem Martin Raiber
` (5 subsequent siblings)
6 siblings, 2 replies; 15+ messages in thread
From: Martin Raiber @ 2026-01-12 16:17 UTC (permalink / raw)
To: linux-btrfs; +Cc: Martin Raiber
Use a percpu counter to keep track of the block group refs.
This prevents CPU synchronization completely as long as the main reference
is not freed via btrfs_remove_block_group, improving performance of
btrfs_put_block_group, btrfs_get_block_group significantly.
Signed-off-by: Martin Raiber <martin@urbackup.org>
---
fs/btrfs/block-group.c | 111 +++++++++++++++++++++++------------------
fs/btrfs/block-group.h | 2 +-
2 files changed, 63 insertions(+), 50 deletions(-)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index a1119f06b6d1..7569438ccbd5 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -153,37 +153,44 @@ u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
void btrfs_get_block_group(struct btrfs_block_group *cache)
{
- refcount_inc(&cache->refs);
+ percpu_ref_get(&cache->refs);
}
-void btrfs_put_block_group(struct btrfs_block_group *cache)
+static void btrfs_free_block_group(struct percpu_ref *ref)
{
- if (refcount_dec_and_test(&cache->refs)) {
- WARN_ON(cache->pinned > 0);
- /*
- * If there was a failure to cleanup a log tree, very likely due
- * to an IO failure on a writeback attempt of one or more of its
- * extent buffers, we could not do proper (and cheap) unaccounting
- * of their reserved space, so don't warn on reserved > 0 in that
- * case.
- */
- if (!(cache->flags & BTRFS_BLOCK_GROUP_METADATA) ||
- !BTRFS_FS_LOG_CLEANUP_ERROR(cache->fs_info))
- WARN_ON(cache->reserved > 0);
+ struct btrfs_block_group *cache =
+ container_of(ref, struct btrfs_block_group, refs);
- /*
- * A block_group shouldn't be on the discard_list anymore.
- * Remove the block_group from the discard_list to prevent us
- * from causing a panic due to NULL pointer dereference.
- */
- if (WARN_ON(!list_empty(&cache->discard_list)))
- btrfs_discard_cancel_work(&cache->fs_info->discard_ctl,
- cache);
+ WARN_ON(cache->pinned > 0);
+ /*
+ * If there was a failure to cleanup a log tree, very likely due
+ * to an IO failure on a writeback attempt of one or more of its
+ * extent buffers, we could not do proper (and cheap) unaccounting
+ * of their reserved space, so don't warn on reserved > 0 in that
+ * case.
+ */
+ if (!(cache->flags & BTRFS_BLOCK_GROUP_METADATA) ||
+ !BTRFS_FS_LOG_CLEANUP_ERROR(cache->fs_info))
+ WARN_ON(cache->reserved > 0);
- kfree(cache->free_space_ctl);
- btrfs_free_chunk_map(cache->physical_map);
- kfree(cache);
- }
+ /*
+ * A block_group shouldn't be on the discard_list anymore.
+ * Remove the block_group from the discard_list to prevent us
+ * from causing a panic due to NULL pointer dereference.
+ */
+ if (WARN_ON(!list_empty(&cache->discard_list)))
+ btrfs_discard_cancel_work(&cache->fs_info->discard_ctl,
+ cache);
+
+ percpu_ref_exit(&cache->refs);
+ kfree(cache->free_space_ctl);
+ btrfs_free_chunk_map(cache->physical_map);
+ kfree(cache);
+}
+
+void btrfs_put_block_group(struct btrfs_block_group *cache)
+{
+ percpu_ref_put(&cache->refs);
}
static int btrfs_bg_start_cmp(const struct rb_node *new,
@@ -406,8 +413,8 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg)
* on the groups' semaphore is held and decremented after releasing
* the read access on that semaphore and creating the ordered extent.
*/
- down_write(&space_info->groups_sem);
- up_write(&space_info->groups_sem);
+ percpu_down_write(&space_info->groups_sem);
+ percpu_up_write(&space_info->groups_sem);
wait_var_event(&bg->reservations, !atomic_read(&bg->reservations));
}
@@ -1012,7 +1019,7 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
struct btrfs_space_info *sinfo;
list_for_each_entry_rcu(sinfo, head, list) {
- down_read(&sinfo->groups_sem);
+ percpu_down_read(&sinfo->groups_sem);
if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5]))
found_raid56 = true;
if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6]))
@@ -1021,7 +1028,7 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
found_raid1c34 = true;
if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C4]))
found_raid1c34 = true;
- up_read(&sinfo->groups_sem);
+ percpu_up_read(&sinfo->groups_sem);
}
if (!found_raid56)
btrfs_clear_fs_incompat(fs_info, RAID56);
@@ -1159,11 +1166,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
RB_CLEAR_NODE(&block_group->cache_node);
/* Once for the block groups rbtree */
- btrfs_put_block_group(block_group);
+ percpu_ref_kill(&block_group->refs);
write_unlock(&fs_info->block_group_cache_lock);
- down_write(&block_group->space_info->groups_sem);
+ percpu_down_write(&block_group->space_info->groups_sem);
/*
* we must use list_del_init so people can check to see if they
* are still on the list after taking the semaphore
@@ -1174,7 +1181,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
block_group->space_info->block_group_kobjs[index] = NULL;
clear_avail_alloc_bits(fs_info, block_group->flags);
}
- up_write(&block_group->space_info->groups_sem);
+ percpu_up_write(&block_group->space_info->groups_sem);
clear_incompat_bg_bits(fs_info, block_group->flags);
if (kobj) {
kobject_del(kobj);
@@ -1544,7 +1551,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
/* Don't want to race with allocators so take the groups_sem */
- down_write(&space_info->groups_sem);
+ percpu_down_write(&space_info->groups_sem);
/*
* Async discard moves the final block group discard to be prior
@@ -1554,7 +1561,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
if (btrfs_test_opt(fs_info, DISCARD_ASYNC) &&
!btrfs_is_free_space_trimmed(block_group)) {
trace_btrfs_skip_unused_block_group(block_group);
- up_write(&space_info->groups_sem);
+ percpu_up_write(&space_info->groups_sem);
/* Requeue if we failed because of async discard */
btrfs_discard_queue_work(&fs_info->discard_ctl,
block_group);
@@ -1581,7 +1588,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
trace_btrfs_skip_unused_block_group(block_group);
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
- up_write(&space_info->groups_sem);
+ percpu_up_write(&space_info->groups_sem);
goto next;
}
@@ -1618,7 +1625,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
trace_btrfs_skip_unused_block_group(block_group);
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
- up_write(&space_info->groups_sem);
+ percpu_up_write(&space_info->groups_sem);
goto next;
}
@@ -1627,7 +1634,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
/* We don't want to force the issue, only flip if it's ok. */
ret = inc_block_group_ro(block_group, 0);
- up_write(&space_info->groups_sem);
+ percpu_up_write(&space_info->groups_sem);
if (ret < 0) {
ret = 0;
goto next;
@@ -1882,7 +1889,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
spin_unlock(&fs_info->unused_bgs_lock);
/* Don't race with allocators so take the groups_sem */
- down_write(&space_info->groups_sem);
+ percpu_down_write(&space_info->groups_sem);
spin_lock(&space_info->lock);
spin_lock(&bg->lock);
@@ -1895,7 +1902,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
*/
spin_unlock(&bg->lock);
spin_unlock(&space_info->lock);
- up_write(&space_info->groups_sem);
+ percpu_up_write(&space_info->groups_sem);
goto next;
}
if (bg->used == 0) {
@@ -1914,7 +1921,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
btrfs_mark_bg_unused(bg);
spin_unlock(&bg->lock);
spin_unlock(&space_info->lock);
- up_write(&space_info->groups_sem);
+ percpu_up_write(&space_info->groups_sem);
goto next;
}
@@ -1931,7 +1938,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
if (!should_reclaim_block_group(bg, bg->length)) {
spin_unlock(&bg->lock);
spin_unlock(&space_info->lock);
- up_write(&space_info->groups_sem);
+ percpu_up_write(&space_info->groups_sem);
goto next;
}
@@ -1947,12 +1954,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
* never gets back to read-write to let us reclaim again.
*/
if (btrfs_need_cleaner_sleep(fs_info)) {
- up_write(&space_info->groups_sem);
+ percpu_up_write(&space_info->groups_sem);
goto next;
}
ret = inc_block_group_ro(bg, 0);
- up_write(&space_info->groups_sem);
+ percpu_up_write(&space_info->groups_sem);
if (ret < 0)
goto next;
@@ -2288,7 +2295,12 @@ static struct btrfs_block_group *btrfs_create_block_group(
cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
- refcount_set(&cache->refs, 1);
+ if (percpu_ref_init(&cache->refs, btrfs_free_block_group,
+ 0, GFP_NOFS)) {
+ kfree(cache->free_space_ctl);
+ kfree(cache);
+ return NULL;
+ }
spin_lock_init(&cache->lock);
init_rwsem(&cache->data_rwsem);
INIT_LIST_HEAD(&cache->list);
@@ -4550,9 +4562,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
RB_CLEAR_NODE(&block_group->cache_node);
write_unlock(&info->block_group_cache_lock);
- down_write(&block_group->space_info->groups_sem);
+ percpu_down_write(&block_group->space_info->groups_sem);
list_del(&block_group->list);
- up_write(&block_group->space_info->groups_sem);
+ percpu_up_write(&block_group->space_info->groups_sem);
/*
* We haven't cached this block group, which means we could
@@ -4567,9 +4579,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
ASSERT(list_empty(&block_group->dirty_list));
ASSERT(list_empty(&block_group->io_list));
ASSERT(list_empty(&block_group->bg_list));
- ASSERT(refcount_read(&block_group->refs) == 1);
+ ASSERT(!percpu_ref_is_zero(&block_group->refs));
ASSERT(block_group->swap_extents == 0);
- btrfs_put_block_group(block_group);
+ percpu_ref_kill(&block_group->refs);
+ ASSERT(percpu_ref_is_zero(&block_group->refs));
write_lock(&info->block_group_cache_lock);
}
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 5f933455118c..d44675f9d601 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -178,7 +178,7 @@ struct btrfs_block_group {
/* For block groups in the same raid type */
struct list_head list;
- refcount_t refs;
+ struct percpu_ref refs;
/*
* List of struct btrfs_free_clusters for this block group.
--
2.39.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 2/7] btrfs: Use percpu semaphore for space info groups_sem
[not found] <20260112161549.2786827-1-martin@urbackup.org>
2026-01-12 16:17 ` [PATCH 1/7] btrfs: Use percpu refcounting for block groups Martin Raiber
@ 2026-01-12 16:17 ` Martin Raiber
2026-01-12 18:58 ` [PATCH v2 " Martin Raiber
2026-01-12 22:58 ` [PATCH " Boris Burkov
2026-01-12 16:17 ` [PATCH 3/7] btrfs: Don't lock data_rwsem if space cache v1 is not used Martin Raiber
` (4 subsequent siblings)
6 siblings, 2 replies; 15+ messages in thread
From: Martin Raiber @ 2026-01-12 16:17 UTC (permalink / raw)
To: linux-btrfs; +Cc: Martin Raiber
Groups_sem is locked for write mostly only when adding
or removing block groups, whereas it is locked for read
constantly by multiple CPUs.
Change it into a percpu semaphore to significantly
increase the performance of find_free_extent.
Signed-off-by: Martin Raiber <martin@urbackup.org>
---
fs/btrfs/extent-tree.c | 8 ++++----
fs/btrfs/ioctl.c | 8 ++++----
fs/btrfs/space-info.c | 29 +++++++++++++++++++----------
fs/btrfs/space-info.h | 2 +-
fs/btrfs/sysfs.c | 9 +++++----
fs/btrfs/zoned.c | 11 +++++------
6 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1dcd69fe97ed..ce2eef069663 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4442,7 +4442,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
block_group->space_info == space_info &&
block_group->cached != BTRFS_CACHE_NO) {
- down_read(&space_info->groups_sem);
+ percpu_down_read(&space_info->groups_sem);
if (list_empty(&block_group->list) ||
block_group->ro) {
/*
@@ -4452,7 +4452,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
* valid
*/
btrfs_put_block_group(block_group);
- up_read(&space_info->groups_sem);
+ percpu_up_read(&space_info->groups_sem);
} else {
ffe_ctl->index = btrfs_bg_flags_to_raid_index(
block_group->flags);
@@ -4471,7 +4471,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
ffe_ctl->index == 0)
full_search = true;
- down_read(&space_info->groups_sem);
+ percpu_down_read(&space_info->groups_sem);
list_for_each_entry(block_group,
&space_info->block_groups[ffe_ctl->index], list) {
struct btrfs_block_group *bg_ret;
@@ -4609,7 +4609,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
cond_resched();
}
- up_read(&space_info->groups_sem);
+ percpu_up_read(&space_info->groups_sem);
ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, space_info,
full_search);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d9e7dd317670..73ff0efc0381 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2940,12 +2940,12 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
if (!info)
continue;
- down_read(&info->groups_sem);
+ percpu_down_read(&info->groups_sem);
for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
if (!list_empty(&info->block_groups[c]))
slot_count++;
}
- up_read(&info->groups_sem);
+ percpu_up_read(&info->groups_sem);
}
/*
@@ -2992,7 +2992,7 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
if (!info)
continue;
- down_read(&info->groups_sem);
+ percpu_down_read(&info->groups_sem);
for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
if (!list_empty(&info->block_groups[c])) {
get_block_group_info(&info->block_groups[c],
@@ -3005,7 +3005,7 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
if (!slot_count)
break;
}
- up_read(&info->groups_sem);
+ percpu_up_read(&info->groups_sem);
}
/*
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 857e4fd2c77e..ddedeccbdade 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -234,13 +234,14 @@ void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
WRITE_ONCE(space_info->chunk_size, chunk_size);
}
-static void init_space_info(struct btrfs_fs_info *info,
+static int init_space_info(struct btrfs_fs_info *info,
struct btrfs_space_info *space_info, u64 flags)
{
space_info->fs_info = info;
for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++)
INIT_LIST_HEAD(&space_info->block_groups[i]);
- init_rwsem(&space_info->groups_sem);
+ if (!percpu_init_rwsem(&space_info->groups_sem))
+ return -ENOMEM;
spin_lock_init(&space_info->lock);
space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
@@ -253,6 +254,8 @@ static void init_space_info(struct btrfs_fs_info *info,
if (btrfs_is_zoned(info))
space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
+
+ return 0;
}
static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flags,
@@ -270,7 +273,10 @@ static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flag
if (!sub_group)
return -ENOMEM;
- init_space_info(fs_info, sub_group, flags);
+ if (init_space_info(fs_info, sub_group, flags)) {
+ kfree(sub_group);
+ return -ENOMEM;
+ }
parent->sub_group[index] = sub_group;
sub_group->parent = parent;
sub_group->subgroup_id = id;
@@ -293,7 +299,10 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
if (!space_info)
return -ENOMEM;
- init_space_info(info, space_info, flags);
+ if (init_space_info(info, space_info, flags)) {
+ kfree(space_info);
+ return -ENOMEM;
+ }
if (btrfs_is_zoned(info)) {
if (flags & BTRFS_BLOCK_GROUP_DATA)
@@ -384,9 +393,9 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
block_group->space_info = space_info;
index = btrfs_bg_flags_to_raid_index(block_group->flags);
- down_write(&space_info->groups_sem);
+ percpu_down_write(&space_info->groups_sem);
list_add_tail(&block_group->list, &space_info->block_groups[index]);
- up_write(&space_info->groups_sem);
+ percpu_up_write(&space_info->groups_sem);
}
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
@@ -650,7 +659,7 @@ void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
if (!dump_block_groups)
return;
- down_read(&info->groups_sem);
+ percpu_down_read(&info->groups_sem);
again:
list_for_each_entry(cache, &info->block_groups[index], list) {
u64 avail;
@@ -670,7 +679,7 @@ void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
}
if (++index < BTRFS_NR_RAID_TYPES)
goto again;
- up_read(&info->groups_sem);
+ percpu_up_read(&info->groups_sem);
btrfs_info(fs_info, "%llu bytes available across all block groups", total_avail);
}
@@ -2095,7 +2104,7 @@ static void do_reclaim_sweep(struct btrfs_space_info *space_info, int raid)
thresh_pct = btrfs_calc_reclaim_threshold(space_info);
spin_unlock(&space_info->lock);
- down_read(&space_info->groups_sem);
+ percpu_down_read(&space_info->groups_sem);
again:
list_for_each_entry(bg, &space_info->block_groups[raid], list) {
u64 thresh;
@@ -2127,7 +2136,7 @@ static void do_reclaim_sweep(struct btrfs_space_info *space_info, int raid)
goto again;
}
- up_read(&space_info->groups_sem);
+ percpu_up_read(&space_info->groups_sem);
}
void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes)
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 0703f24b23f7..f99624069391 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -175,7 +175,7 @@ struct btrfs_space_info {
*/
u64 tickets_id;
- struct rw_semaphore groups_sem;
+ struct percpu_rw_semaphore groups_sem;
/* for block groups in our same type */
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index ebd6d1d6778b..ccec9eb1fa4f 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -701,14 +701,14 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags);
u64 val = 0;
- down_read(&sinfo->groups_sem);
+ percpu_down_read(&sinfo->groups_sem);
list_for_each_entry(block_group, &sinfo->block_groups[index], list) {
if (&attr->attr == BTRFS_ATTR_PTR(raid, total_bytes))
val += block_group->length;
else
val += block_group->used;
}
- up_read(&sinfo->groups_sem);
+ percpu_up_read(&sinfo->groups_sem);
return sysfs_emit(buf, "%llu\n", val);
}
@@ -816,7 +816,7 @@ static ssize_t btrfs_size_classes_show(struct kobject *kobj,
u32 large = 0;
for (int i = 0; i < BTRFS_NR_RAID_TYPES; ++i) {
- down_read(&sinfo->groups_sem);
+ percpu_down_read(&sinfo->groups_sem);
list_for_each_entry(bg, &sinfo->block_groups[i], list) {
if (!btrfs_block_group_should_use_size_class(bg))
continue;
@@ -835,7 +835,7 @@ static ssize_t btrfs_size_classes_show(struct kobject *kobj,
break;
}
}
- up_read(&sinfo->groups_sem);
+ percpu_up_read(&sinfo->groups_sem);
}
return sysfs_emit(buf, "none %u\n"
"small %u\n"
@@ -1046,6 +1046,7 @@ ATTRIBUTE_GROUPS(space_info);
static void space_info_release(struct kobject *kobj)
{
struct btrfs_space_info *sinfo = to_space_info(kobj);
+ percpu_free_rwsem(&sinfo->groups_sem);
kfree(sinfo);
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 2e861eef5cd8..da92b0d38a1b 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2588,12 +2588,11 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
"reloc_sinfo->subgroup_id=%d", reloc_sinfo->subgroup_id);
factor = btrfs_bg_type_to_factor(bg->flags);
- down_write(&space_info->groups_sem);
+ percpu_down_write(&space_info->groups_sem);
list_del_init(&bg->list);
/* We can assume this as we choose the second empty one. */
ASSERT(!list_empty(&space_info->block_groups[index]));
- up_write(&space_info->groups_sem);
-
+ percpu_up_write(&space_info->groups_sem);
spin_lock(&space_info->lock);
space_info->total_bytes -= bg->length;
space_info->disk_total -= bg->length * factor;
@@ -2771,7 +2770,7 @@ int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_fin
int ret;
bool need_finish = false;
- down_read(&space_info->groups_sem);
+ percpu_down_read(&space_info->groups_sem);
for (index = 0; index < BTRFS_NR_RAID_TYPES; index++) {
list_for_each_entry(bg, &space_info->block_groups[index],
list) {
@@ -2786,14 +2785,14 @@ int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_fin
spin_unlock(&bg->lock);
if (btrfs_zone_activate(bg)) {
- up_read(&space_info->groups_sem);
+ percpu_up_read(&space_info->groups_sem);
return 1;
}
need_finish = true;
}
}
- up_read(&space_info->groups_sem);
+ percpu_up_read(&space_info->groups_sem);
if (!do_finish || !need_finish)
break;
--
2.39.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 3/7] btrfs: Don't lock data_rwsem if space cache v1 is not used
[not found] <20260112161549.2786827-1-martin@urbackup.org>
2026-01-12 16:17 ` [PATCH 1/7] btrfs: Use percpu refcounting for block groups Martin Raiber
2026-01-12 16:17 ` [PATCH 2/7] btrfs: Use percpu semaphore for space info groups_sem Martin Raiber
@ 2026-01-12 16:17 ` Martin Raiber
2026-01-12 16:17 ` [PATCH 6/7] btrfs: Introduce fast path for checking if a block group is done Martin Raiber
` (3 subsequent siblings)
6 siblings, 0 replies; 15+ messages in thread
From: Martin Raiber @ 2026-01-12 16:17 UTC (permalink / raw)
To: linux-btrfs; +Cc: Martin Raiber
Data_rwsem is only used in space cache v1. Introduce a new
cache state BTRFS_DC_DISABLED to easily tell if space cache v1
is not used for the block group. Then do not lock data_rwsem
if space cache v1 is not used.
This significantly improves performance in the common cases
Signed-off-by: Martin Raiber <martin@urbackup.org>
---
fs/btrfs/block-group.c | 3 +++
fs/btrfs/block-group.h | 3 ++-
fs/btrfs/extent-tree.c | 12 +++++++-----
3 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 7569438ccbd5..d9ea62c32587 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2315,6 +2315,9 @@ static struct btrfs_block_group *btrfs_create_block_group(
atomic_set(&cache->frozen, 0);
mutex_init(&cache->free_space_lock);
+ if (!btrfs_test_opt(fs_info, SPACE_CACHE))
+ cache->disk_cache_state = BTRFS_DC_DISABLED;
+
return cache;
}
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index d44675f9d601..cf877747fd56 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -22,9 +22,10 @@ struct btrfs_trans_handle;
enum btrfs_disk_cache_state {
BTRFS_DC_WRITTEN,
+ BTRFS_DC_DISABLED,
BTRFS_DC_ERROR,
BTRFS_DC_CLEAR,
- BTRFS_DC_SETUP,
+ BTRFS_DC_SETUP
};
enum btrfs_block_group_size_class {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ce2eef069663..7a5e4efd6cd8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3578,7 +3578,7 @@ enum btrfs_loop_type {
static inline void
btrfs_lock_block_group(struct btrfs_block_group *cache, bool delalloc)
{
- if (delalloc)
+ if (delalloc && cache->disk_cache_state != BTRFS_DC_DISABLED)
down_read(&cache->data_rwsem);
}
@@ -3586,7 +3586,7 @@ static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
bool delalloc)
{
btrfs_get_block_group(cache);
- if (delalloc)
+ if (delalloc && cache->disk_cache_state != BTRFS_DC_DISABLED)
down_read(&cache->data_rwsem);
}
@@ -3612,7 +3612,8 @@ static struct btrfs_block_group *btrfs_lock_cluster(
if (!delalloc)
return used_bg;
- if (down_read_trylock(&used_bg->data_rwsem))
+ if (used_bg->disk_cache_state != BTRFS_DC_DISABLED &&
+ down_read_trylock(&used_bg->data_rwsem))
return used_bg;
spin_unlock(&cluster->refill_lock);
@@ -3624,7 +3625,8 @@ static struct btrfs_block_group *btrfs_lock_cluster(
if (used_bg == cluster->block_group)
return used_bg;
- up_read(&used_bg->data_rwsem);
+ if (used_bg->disk_cache_state != BTRFS_DC_DISABLED)
+ up_read(&used_bg->data_rwsem);
btrfs_put_block_group(used_bg);
}
}
@@ -3632,7 +3634,7 @@ static struct btrfs_block_group *btrfs_lock_cluster(
static inline void
btrfs_release_block_group(struct btrfs_block_group *cache, bool delalloc)
{
- if (delalloc)
+ if (delalloc && cache->disk_cache_state != BTRFS_DC_DISABLED)
up_read(&cache->data_rwsem);
btrfs_put_block_group(cache);
}
--
2.39.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 4/7] btrfs: Use percpu sem for block_group_cache_lock
[not found] <20260112161549.2786827-1-martin@urbackup.org>
` (3 preceding siblings ...)
2026-01-12 16:17 ` [PATCH 6/7] btrfs: Introduce fast path for checking if a block group is done Martin Raiber
@ 2026-01-12 16:17 ` Martin Raiber
2026-01-12 16:17 ` [PATCH 5/7] btrfs: Skip locking percpu semaphores on mount Martin Raiber
2026-01-12 16:17 ` [PATCH 7/7] btrfs: Move block group members frequently accessed together closer Martin Raiber
6 siblings, 0 replies; 15+ messages in thread
From: Martin Raiber @ 2026-01-12 16:17 UTC (permalink / raw)
To: linux-btrfs; +Cc: Martin Raiber
block_group_cache_lock is only locked for write when adding or
removing a block group which is rare, whereas it is locked for
read frequently on every find_free_extent via e.g. first_logical_byte.
Convert it to a percpu rwsem to improve performance
significantly.
Signed-off-by: Martin Raiber <martin@urbackup.org>
---
fs/btrfs/block-group.c | 38 +++++++++++++++++++-------------------
fs/btrfs/disk-io.c | 6 +++++-
fs/btrfs/extent-tree.c | 4 ++--
fs/btrfs/fs.h | 2 +-
4 files changed, 27 insertions(+), 23 deletions(-)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index d9ea62c32587..702b8e7a67a4 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -219,13 +219,13 @@ static int btrfs_add_block_group_cache(struct btrfs_block_group *block_group)
ASSERT(block_group->length != 0);
- write_lock(&fs_info->block_group_cache_lock);
+ percpu_down_write(&fs_info->block_group_cache_lock);
exist = rb_find_add_cached(&block_group->cache_node,
&fs_info->block_group_cache_tree, btrfs_bg_start_cmp);
if (exist)
ret = -EEXIST;
- write_unlock(&fs_info->block_group_cache_lock);
+ percpu_up_write(&fs_info->block_group_cache_lock);
return ret;
}
@@ -241,7 +241,7 @@ static struct btrfs_block_group *block_group_cache_tree_search(
struct rb_node *n;
u64 end, start;
- read_lock(&info->block_group_cache_lock);
+ percpu_down_read(&info->block_group_cache_lock);
n = info->block_group_cache_tree.rb_root.rb_node;
while (n) {
@@ -266,7 +266,7 @@ static struct btrfs_block_group *block_group_cache_tree_search(
}
if (ret)
btrfs_get_block_group(ret);
- read_unlock(&info->block_group_cache_lock);
+ percpu_up_read(&info->block_group_cache_lock);
return ret;
}
@@ -295,13 +295,13 @@ struct btrfs_block_group *btrfs_next_block_group(
struct btrfs_fs_info *fs_info = cache->fs_info;
struct rb_node *node;
- read_lock(&fs_info->block_group_cache_lock);
+ percpu_down_read(&fs_info->block_group_cache_lock);
/* If our block group was removed, we need a full search. */
if (RB_EMPTY_NODE(&cache->cache_node)) {
const u64 next_bytenr = cache->start + cache->length;
- read_unlock(&fs_info->block_group_cache_lock);
+ percpu_up_read(&fs_info->block_group_cache_lock);
btrfs_put_block_group(cache);
return btrfs_lookup_first_block_group(fs_info, next_bytenr);
}
@@ -312,7 +312,7 @@ struct btrfs_block_group *btrfs_next_block_group(
btrfs_get_block_group(cache);
} else
cache = NULL;
- read_unlock(&fs_info->block_group_cache_lock);
+ percpu_up_read(&fs_info->block_group_cache_lock);
return cache;
}
@@ -967,10 +967,10 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
cache->cached = BTRFS_CACHE_STARTED;
spin_unlock(&cache->lock);
- write_lock(&fs_info->block_group_cache_lock);
+ percpu_down_write(&fs_info->block_group_cache_lock);
refcount_inc(&caching_ctl->count);
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
- write_unlock(&fs_info->block_group_cache_lock);
+ percpu_up_write(&fs_info->block_group_cache_lock);
btrfs_get_block_group(cache);
@@ -1160,7 +1160,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- write_lock(&fs_info->block_group_cache_lock);
+ percpu_down_write(&fs_info->block_group_cache_lock);
rb_erase_cached(&block_group->cache_node,
&fs_info->block_group_cache_tree);
RB_CLEAR_NODE(&block_group->cache_node);
@@ -1168,7 +1168,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
/* Once for the block groups rbtree */
percpu_ref_kill(&block_group->refs);
- write_unlock(&fs_info->block_group_cache_lock);
+ percpu_up_write(&fs_info->block_group_cache_lock);
percpu_down_write(&block_group->space_info->groups_sem);
/*
@@ -1191,7 +1191,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
if (block_group->cached == BTRFS_CACHE_STARTED)
btrfs_wait_block_group_cache_done(block_group);
- write_lock(&fs_info->block_group_cache_lock);
+ percpu_down_write(&fs_info->block_group_cache_lock);
caching_ctl = btrfs_get_caching_control(block_group);
if (!caching_ctl) {
struct btrfs_caching_control *ctl;
@@ -1206,7 +1206,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
}
if (caching_ctl)
list_del_init(&caching_ctl->list);
- write_unlock(&fs_info->block_group_cache_lock);
+ percpu_up_write(&fs_info->block_group_cache_lock);
if (caching_ctl) {
/* Once for the caching bgs list and once for us. */
@@ -4519,14 +4519,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
}
}
- write_lock(&info->block_group_cache_lock);
+ percpu_down_write(&info->block_group_cache_lock);
while (!list_empty(&info->caching_block_groups)) {
caching_ctl = list_first_entry(&info->caching_block_groups,
struct btrfs_caching_control, list);
list_del(&caching_ctl->list);
btrfs_put_caching_control(caching_ctl);
}
- write_unlock(&info->block_group_cache_lock);
+ percpu_up_write(&info->block_group_cache_lock);
spin_lock(&info->unused_bgs_lock);
while (!list_empty(&info->unused_bgs)) {
@@ -4556,14 +4556,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
}
spin_unlock(&info->zone_active_bgs_lock);
- write_lock(&info->block_group_cache_lock);
+ percpu_down_write(&info->block_group_cache_lock);
while ((n = rb_last(&info->block_group_cache_tree.rb_root)) != NULL) {
block_group = rb_entry(n, struct btrfs_block_group,
cache_node);
rb_erase_cached(&block_group->cache_node,
&info->block_group_cache_tree);
RB_CLEAR_NODE(&block_group->cache_node);
- write_unlock(&info->block_group_cache_lock);
+ percpu_up_write(&info->block_group_cache_lock);
percpu_down_write(&block_group->space_info->groups_sem);
list_del(&block_group->list);
@@ -4587,9 +4587,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
percpu_ref_kill(&block_group->refs);
ASSERT(percpu_ref_is_zero(&block_group->refs));
- write_lock(&info->block_group_cache_lock);
+ percpu_down_write(&info->block_group_cache_lock);
}
- write_unlock(&info->block_group_cache_lock);
+ percpu_up_write(&info->block_group_cache_lock);
btrfs_release_global_block_rsv(info);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index cecb81d0f9e0..d443cdebb38e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1227,6 +1227,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
btrfs_close_devices(fs_info->fs_devices);
btrfs_free_compress_wsm(fs_info);
percpu_counter_destroy(&fs_info->stats_read_blocks);
+ percpu_free_rwsem(&fs_info->block_group_cache_lock);
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
percpu_counter_destroy(&fs_info->delalloc_bytes);
percpu_counter_destroy(&fs_info->ordered_bytes);
@@ -2804,7 +2805,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
btrfs_init_async_reclaim_work(fs_info);
btrfs_init_extent_map_shrinker_work(fs_info);
- rwlock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree = RB_ROOT_CACHED;
btrfs_extent_io_tree_init(fs_info, &fs_info->excluded_extents,
@@ -2861,6 +2861,10 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
sb->s_blocksize = BTRFS_BDEV_BLOCKSIZE;
sb->s_blocksize_bits = blksize_bits(BTRFS_BDEV_BLOCKSIZE);
+ ret = percpu_init_rwsem(&fs_info->block_group_cache_lock);
+ if (ret)
+ return ret;
+
ret = percpu_counter_init(&fs_info->ordered_bytes, 0, GFP_KERNEL);
if (ret)
return ret;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7a5e4efd6cd8..5ec8b1cfc317 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2575,7 +2575,7 @@ static u64 first_logical_byte(struct btrfs_fs_info *fs_info)
struct rb_node *leftmost;
u64 bytenr = 0;
- read_lock(&fs_info->block_group_cache_lock);
+ percpu_down_read(&fs_info->block_group_cache_lock);
/* Get the block group with the lowest logical start address. */
leftmost = rb_first_cached(&fs_info->block_group_cache_tree);
if (leftmost) {
@@ -2584,7 +2584,7 @@ static u64 first_logical_byte(struct btrfs_fs_info *fs_info)
bg = rb_entry(leftmost, struct btrfs_block_group, cache_node);
bytenr = bg->start;
}
- read_unlock(&fs_info->block_group_cache_lock);
+ percpu_up_read(&fs_info->block_group_cache_lock);
return bytenr;
}
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 859551cf9bee..353bbc5ad49c 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -479,7 +479,7 @@ struct btrfs_fs_info {
struct radix_tree_root fs_roots_radix;
/* Block group cache stuff */
- rwlock_t block_group_cache_lock;
+ struct percpu_rw_semaphore block_group_cache_lock;
struct rb_root_cached block_group_cache_tree;
/* Keep track of unallocated space */
--
2.39.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 6/7] btrfs: Introduce fast path for checking if a block group is done
[not found] <20260112161549.2786827-1-martin@urbackup.org>
` (2 preceding siblings ...)
2026-01-12 16:17 ` [PATCH 3/7] btrfs: Don't lock data_rwsem if space cache v1 is not used Martin Raiber
@ 2026-01-12 16:17 ` Martin Raiber
2026-01-13 3:48 ` Sun Yangkai
2026-01-12 16:17 ` [PATCH 4/7] btrfs: Use percpu sem for block_group_cache_lock Martin Raiber
` (2 subsequent siblings)
6 siblings, 1 reply; 15+ messages in thread
From: Martin Raiber @ 2026-01-12 16:17 UTC (permalink / raw)
To: linux-btrfs; +Cc: Martin Raiber
A block group cannot switch away from BTRFS_CACHE_FINISHED
once it enters that state. Therefore we can introduce
a fast path that checks for the likely case
that the block group is already cached, avoiding
a full memory barrier in the likely fast path.
Signed-off-by: Martin Raiber <martin@urbackup.org>
---
fs/btrfs/block-group.h | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index cf877747fd56..73bdf7091d49 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -380,6 +380,9 @@ static inline u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
static inline int btrfs_block_group_done(const struct btrfs_block_group *cache)
{
+ if (likely(cache->cached == BTRFS_CACHE_FINISHED))
+ return 1;
+
smp_mb();
return cache->cached == BTRFS_CACHE_FINISHED ||
cache->cached == BTRFS_CACHE_ERROR;
--
2.39.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 5/7] btrfs: Skip locking percpu semaphores on mount
[not found] <20260112161549.2786827-1-martin@urbackup.org>
` (4 preceding siblings ...)
2026-01-12 16:17 ` [PATCH 4/7] btrfs: Use percpu sem for block_group_cache_lock Martin Raiber
@ 2026-01-12 16:17 ` Martin Raiber
2026-01-12 16:17 ` [PATCH 7/7] btrfs: Move block group members frequently accessed together closer Martin Raiber
6 siblings, 0 replies; 15+ messages in thread
From: Martin Raiber @ 2026-01-12 16:17 UTC (permalink / raw)
To: linux-btrfs; +Cc: Martin Raiber
We are adding block groups rapidly on mount and locking the
percpu semaphores for each block group addition. This is
quite slow.
Since mount/open_ctree is currently single-threaded we
can simply not lock in this case, removing the percpu
write lock overhead.
Signed-off-by: Martin Raiber <martin@urbackup.org>
---
fs/btrfs/block-group.c | 20 +++++++++++---------
fs/btrfs/space-info.c | 8 +++++---
fs/btrfs/space-info.h | 2 +-
fs/btrfs/zoned.c | 2 +-
4 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 702b8e7a67a4..85038c33f3ac 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -211,7 +211,7 @@ static int btrfs_bg_start_cmp(const struct rb_node *new,
/*
* This adds the block group to the fs_info rb tree for the block group cache
*/
-static int btrfs_add_block_group_cache(struct btrfs_block_group *block_group)
+static int btrfs_add_block_group_cache(struct btrfs_block_group *block_group, int lock)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct rb_node *exist;
@@ -219,13 +219,15 @@ static int btrfs_add_block_group_cache(struct btrfs_block_group *block_group)
ASSERT(block_group->length != 0);
- percpu_down_write(&fs_info->block_group_cache_lock);
+ if (lock)
+ percpu_down_write(&fs_info->block_group_cache_lock);
exist = rb_find_add_cached(&block_group->cache_node,
&fs_info->block_group_cache_tree, btrfs_bg_start_cmp);
if (exist)
ret = -EEXIST;
- percpu_up_write(&fs_info->block_group_cache_lock);
+ if (lock)
+ percpu_up_write(&fs_info->block_group_cache_lock);
return ret;
}
@@ -2467,14 +2469,14 @@ static int read_one_block_group(struct btrfs_fs_info *info,
goto error;
}
- ret = btrfs_add_block_group_cache(cache);
+ ret = btrfs_add_block_group_cache(cache, 0);
if (ret) {
btrfs_remove_free_space_cache(cache);
goto error;
}
trace_btrfs_add_block_group(info, cache, 0);
- btrfs_add_bg_to_space_info(info, cache);
+ btrfs_add_bg_to_space_info(info, cache, 0);
set_avail_alloc_bits(info, cache->flags);
if (btrfs_chunk_writeable(info, cache->start)) {
@@ -2518,7 +2520,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
bg->used = map->chunk_len;
bg->flags = map->type;
bg->space_info = btrfs_find_space_info(fs_info, bg->flags);
- ret = btrfs_add_block_group_cache(bg);
+ ret = btrfs_add_block_group_cache(bg, 1);
/*
* We may have some valid block group cache added already, in
* that case we skip to the next one.
@@ -2535,7 +2537,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
break;
}
- btrfs_add_bg_to_space_info(fs_info, bg);
+ btrfs_add_bg_to_space_info(fs_info, bg, 1);
set_avail_alloc_bits(fs_info, bg->flags);
}
@@ -2949,7 +2951,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
cache->space_info = space_info;
ASSERT(cache->space_info);
- ret = btrfs_add_block_group_cache(cache);
+ ret = btrfs_add_block_group_cache(cache, 1);
if (ret) {
btrfs_remove_free_space_cache(cache);
btrfs_put_block_group(cache);
@@ -2961,7 +2963,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
* the rbtree, update the space info's counters.
*/
trace_btrfs_add_block_group(fs_info, cache, 1);
- btrfs_add_bg_to_space_info(fs_info, cache);
+ btrfs_add_bg_to_space_info(fs_info, cache, 1);
btrfs_update_global_block_rsv(fs_info);
#ifdef CONFIG_BTRFS_DEBUG
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index ddedeccbdade..b2605c9d79b4 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -371,7 +371,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
}
void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
- struct btrfs_block_group *block_group)
+ struct btrfs_block_group *block_group, int lock)
{
struct btrfs_space_info *space_info = block_group->space_info;
int factor, index;
@@ -393,9 +393,11 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
block_group->space_info = space_info;
index = btrfs_bg_flags_to_raid_index(block_group->flags);
- percpu_down_write(&space_info->groups_sem);
+ if (lock)
+ percpu_down_write(&space_info->groups_sem);
list_add_tail(&block_group->list, &space_info->block_groups[index]);
- percpu_up_write(&space_info->groups_sem);
+ if (lock)
+ percpu_up_write(&space_info->groups_sem);
}
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index f99624069391..7baa335f63fb 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -271,7 +271,7 @@ static inline u64 btrfs_space_info_used(const struct btrfs_space_info *s_info,
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
- struct btrfs_block_group *block_group);
+ struct btrfs_block_group *block_group, int lock);
void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
u64 chunk_size);
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index da92b0d38a1b..46938385ad15 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2607,7 +2607,7 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
if (reloc_sinfo->block_group_kobjs[index] == NULL)
btrfs_sysfs_add_block_group_type(bg);
- btrfs_add_bg_to_space_info(fs_info, bg);
+ btrfs_add_bg_to_space_info(fs_info, bg, 1);
}
fs_info->data_reloc_bg = bg->start;
--
2.39.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 7/7] btrfs: Move block group members frequently accessed together closer
[not found] <20260112161549.2786827-1-martin@urbackup.org>
` (5 preceding siblings ...)
2026-01-12 16:17 ` [PATCH 5/7] btrfs: Skip locking percpu semaphores on mount Martin Raiber
@ 2026-01-12 16:17 ` Martin Raiber
6 siblings, 0 replies; 15+ messages in thread
From: Martin Raiber @ 2026-01-12 16:17 UTC (permalink / raw)
To: linux-btrfs; +Cc: Martin Raiber
The ro, cached and size_class members of btrfs_block_group are
frequently used together by find_free_extent. Move them into the same
cache line at the beginning of the struct.
Signed-off-by: Martin Raiber <martin@urbackup.org>
---
fs/btrfs/block-group.h | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 73bdf7091d49..a356b35af61a 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -120,6 +120,12 @@ struct btrfs_block_group {
struct btrfs_fs_info *fs_info;
struct btrfs_inode *inode;
spinlock_t lock;
+
+ /* Frequently accessed by find_free_extent members */
+ unsigned int ro;
+ int cached;
+ enum btrfs_block_group_size_class size_class;
+
u64 start;
u64 length;
u64 pinned;
@@ -160,12 +166,9 @@ struct btrfs_block_group {
unsigned long full_stripe_len;
unsigned long runtime_flags;
- unsigned int ro;
-
int disk_cache_state;
/* Cache tracking stuff */
- int cached;
struct btrfs_caching_control *caching_ctl;
struct btrfs_space_info *space_info;
@@ -270,7 +273,6 @@ struct btrfs_block_group {
struct list_head active_bg_list;
struct work_struct zone_finish_work;
struct extent_buffer *last_eb;
- enum btrfs_block_group_size_class size_class;
u64 reclaim_mark;
};
--
2.39.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH v2 2/7] btrfs: Use percpu semaphore for space info groups_sem
2026-01-12 16:17 ` [PATCH 2/7] btrfs: Use percpu semaphore for space info groups_sem Martin Raiber
@ 2026-01-12 18:58 ` Martin Raiber
2026-01-14 11:06 ` Johannes Thumshirn
2026-01-12 22:58 ` [PATCH " Boris Burkov
1 sibling, 1 reply; 15+ messages in thread
From: Martin Raiber @ 2026-01-12 18:58 UTC (permalink / raw)
To: linux-btrfs; +Cc: Martin Raiber
Groups_sem is locked for write mostly only when adding
or removing block groups, whereas it is locked for read
constantly by multiple CPUs.
Change it into a percpu semaphore to significantly
increase the performance of find_free_extent.
Signed-off-by: Martin Raiber <martin@urbackup.org>
---
fs/btrfs/extent-tree.c | 8 ++++----
fs/btrfs/ioctl.c | 8 ++++----
fs/btrfs/space-info.c | 29 +++++++++++++++++++----------
fs/btrfs/space-info.h | 2 +-
fs/btrfs/sysfs.c | 9 +++++----
fs/btrfs/zoned.c | 11 +++++------
6 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1dcd69fe97ed..ce2eef069663 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4442,7 +4442,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
block_group->space_info == space_info &&
block_group->cached != BTRFS_CACHE_NO) {
- down_read(&space_info->groups_sem);
+ percpu_down_read(&space_info->groups_sem);
if (list_empty(&block_group->list) ||
block_group->ro) {
/*
@@ -4452,7 +4452,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
* valid
*/
btrfs_put_block_group(block_group);
- up_read(&space_info->groups_sem);
+ percpu_up_read(&space_info->groups_sem);
} else {
ffe_ctl->index = btrfs_bg_flags_to_raid_index(
block_group->flags);
@@ -4471,7 +4471,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
ffe_ctl->index == 0)
full_search = true;
- down_read(&space_info->groups_sem);
+ percpu_down_read(&space_info->groups_sem);
list_for_each_entry(block_group,
&space_info->block_groups[ffe_ctl->index], list) {
struct btrfs_block_group *bg_ret;
@@ -4609,7 +4609,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
cond_resched();
}
- up_read(&space_info->groups_sem);
+ percpu_up_read(&space_info->groups_sem);
ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, space_info,
full_search);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d9e7dd317670..73ff0efc0381 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2940,12 +2940,12 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
if (!info)
continue;
- down_read(&info->groups_sem);
+ percpu_down_read(&info->groups_sem);
for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
if (!list_empty(&info->block_groups[c]))
slot_count++;
}
- up_read(&info->groups_sem);
+ percpu_up_read(&info->groups_sem);
}
/*
@@ -2992,7 +2992,7 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
if (!info)
continue;
- down_read(&info->groups_sem);
+ percpu_down_read(&info->groups_sem);
for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
if (!list_empty(&info->block_groups[c])) {
get_block_group_info(&info->block_groups[c],
@@ -3005,7 +3005,7 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
if (!slot_count)
break;
}
- up_read(&info->groups_sem);
+ percpu_up_read(&info->groups_sem);
}
/*
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 857e4fd2c77e..90538a6ba66b 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -234,13 +234,14 @@ void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
WRITE_ONCE(space_info->chunk_size, chunk_size);
}
-static void init_space_info(struct btrfs_fs_info *info,
+static int init_space_info(struct btrfs_fs_info *info,
struct btrfs_space_info *space_info, u64 flags)
{
space_info->fs_info = info;
for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++)
INIT_LIST_HEAD(&space_info->block_groups[i]);
- init_rwsem(&space_info->groups_sem);
+ if (percpu_init_rwsem(&space_info->groups_sem))
+ return -ENOMEM;
spin_lock_init(&space_info->lock);
space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
@@ -253,6 +254,8 @@ static void init_space_info(struct btrfs_fs_info *info,
if (btrfs_is_zoned(info))
space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
+
+ return 0;
}
static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flags,
@@ -270,7 +273,10 @@ static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flag
if (!sub_group)
return -ENOMEM;
- init_space_info(fs_info, sub_group, flags);
+ if (init_space_info(fs_info, sub_group, flags)) {
+ kfree(sub_group);
+ return -ENOMEM;
+ }
parent->sub_group[index] = sub_group;
sub_group->parent = parent;
sub_group->subgroup_id = id;
@@ -293,7 +299,10 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
if (!space_info)
return -ENOMEM;
- init_space_info(info, space_info, flags);
+ if (init_space_info(info, space_info, flags)) {
+ kfree(space_info);
+ return -ENOMEM;
+ }
if (btrfs_is_zoned(info)) {
if (flags & BTRFS_BLOCK_GROUP_DATA)
@@ -384,9 +393,9 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
block_group->space_info = space_info;
index = btrfs_bg_flags_to_raid_index(block_group->flags);
- down_write(&space_info->groups_sem);
+ percpu_down_write(&space_info->groups_sem);
list_add_tail(&block_group->list, &space_info->block_groups[index]);
- up_write(&space_info->groups_sem);
+ percpu_up_write(&space_info->groups_sem);
}
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
@@ -650,7 +659,7 @@ void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
if (!dump_block_groups)
return;
- down_read(&info->groups_sem);
+ percpu_down_read(&info->groups_sem);
again:
list_for_each_entry(cache, &info->block_groups[index], list) {
u64 avail;
@@ -670,7 +679,7 @@ void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
}
if (++index < BTRFS_NR_RAID_TYPES)
goto again;
- up_read(&info->groups_sem);
+ percpu_up_read(&info->groups_sem);
btrfs_info(fs_info, "%llu bytes available across all block groups", total_avail);
}
@@ -2095,7 +2104,7 @@ static void do_reclaim_sweep(struct btrfs_space_info *space_info, int raid)
thresh_pct = btrfs_calc_reclaim_threshold(space_info);
spin_unlock(&space_info->lock);
- down_read(&space_info->groups_sem);
+ percpu_down_read(&space_info->groups_sem);
again:
list_for_each_entry(bg, &space_info->block_groups[raid], list) {
u64 thresh;
@@ -2127,7 +2136,7 @@ static void do_reclaim_sweep(struct btrfs_space_info *space_info, int raid)
goto again;
}
- up_read(&space_info->groups_sem);
+ percpu_up_read(&space_info->groups_sem);
}
void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes)
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 0703f24b23f7..f99624069391 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -175,7 +175,7 @@ struct btrfs_space_info {
*/
u64 tickets_id;
- struct rw_semaphore groups_sem;
+ struct percpu_rw_semaphore groups_sem;
/* for block groups in our same type */
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index ebd6d1d6778b..ccec9eb1fa4f 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -701,14 +701,14 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags);
u64 val = 0;
- down_read(&sinfo->groups_sem);
+ percpu_down_read(&sinfo->groups_sem);
list_for_each_entry(block_group, &sinfo->block_groups[index], list) {
if (&attr->attr == BTRFS_ATTR_PTR(raid, total_bytes))
val += block_group->length;
else
val += block_group->used;
}
- up_read(&sinfo->groups_sem);
+ percpu_up_read(&sinfo->groups_sem);
return sysfs_emit(buf, "%llu\n", val);
}
@@ -816,7 +816,7 @@ static ssize_t btrfs_size_classes_show(struct kobject *kobj,
u32 large = 0;
for (int i = 0; i < BTRFS_NR_RAID_TYPES; ++i) {
- down_read(&sinfo->groups_sem);
+ percpu_down_read(&sinfo->groups_sem);
list_for_each_entry(bg, &sinfo->block_groups[i], list) {
if (!btrfs_block_group_should_use_size_class(bg))
continue;
@@ -835,7 +835,7 @@ static ssize_t btrfs_size_classes_show(struct kobject *kobj,
break;
}
}
- up_read(&sinfo->groups_sem);
+ percpu_up_read(&sinfo->groups_sem);
}
return sysfs_emit(buf, "none %u\n"
"small %u\n"
@@ -1046,6 +1046,7 @@ ATTRIBUTE_GROUPS(space_info);
static void space_info_release(struct kobject *kobj)
{
struct btrfs_space_info *sinfo = to_space_info(kobj);
+ percpu_free_rwsem(&sinfo->groups_sem);
kfree(sinfo);
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 2e861eef5cd8..da92b0d38a1b 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2588,12 +2588,11 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
"reloc_sinfo->subgroup_id=%d", reloc_sinfo->subgroup_id);
factor = btrfs_bg_type_to_factor(bg->flags);
- down_write(&space_info->groups_sem);
+ percpu_down_write(&space_info->groups_sem);
list_del_init(&bg->list);
/* We can assume this as we choose the second empty one. */
ASSERT(!list_empty(&space_info->block_groups[index]));
- up_write(&space_info->groups_sem);
-
+ percpu_up_write(&space_info->groups_sem);
spin_lock(&space_info->lock);
space_info->total_bytes -= bg->length;
space_info->disk_total -= bg->length * factor;
@@ -2771,7 +2770,7 @@ int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_fin
int ret;
bool need_finish = false;
- down_read(&space_info->groups_sem);
+ percpu_down_read(&space_info->groups_sem);
for (index = 0; index < BTRFS_NR_RAID_TYPES; index++) {
list_for_each_entry(bg, &space_info->block_groups[index],
list) {
@@ -2786,14 +2785,14 @@ int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_fin
spin_unlock(&bg->lock);
if (btrfs_zone_activate(bg)) {
- up_read(&space_info->groups_sem);
+ percpu_up_read(&space_info->groups_sem);
return 1;
}
need_finish = true;
}
}
- up_read(&space_info->groups_sem);
+ percpu_up_read(&space_info->groups_sem);
if (!do_finish || !need_finish)
break;
--
2.39.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 1/7] btrfs: Use percpu refcounting for block groups
2026-01-12 16:17 ` [PATCH 1/7] btrfs: Use percpu refcounting for block groups Martin Raiber
@ 2026-01-12 22:32 ` Boris Burkov
2026-01-14 6:06 ` kernel test robot
1 sibling, 0 replies; 15+ messages in thread
From: Boris Burkov @ 2026-01-12 22:32 UTC (permalink / raw)
To: Martin Raiber; +Cc: linux-btrfs
On Mon, Jan 12, 2026 at 04:17:16PM +0000, Martin Raiber wrote:
> Use a percpu counter to keep track of the block group refs.
> This prevents CPU synchronization completely as long as the main reference
> is not freed via btrfs_remove_block_group, improving performance of
> btrfs_put_block_group, btrfs_get_block_group significantly.
Besides the potential mixup with patch 2, this looks like a nice
improvement to me, since we adhere to the pattern of having a "main"
refcount for the rb tree entry.
My only other complaint is that this seems to lose the helpful
refcount_t warning feature. But I think I can live with that.
>
> Signed-off-by: Martin Raiber <martin@urbackup.org>
> ---
> fs/btrfs/block-group.c | 111 +++++++++++++++++++++++------------------
> fs/btrfs/block-group.h | 2 +-
> 2 files changed, 63 insertions(+), 50 deletions(-)
>
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index a1119f06b6d1..7569438ccbd5 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -153,37 +153,44 @@ u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
>
> void btrfs_get_block_group(struct btrfs_block_group *cache)
> {
> - refcount_inc(&cache->refs);
> + percpu_ref_get(&cache->refs);
> }
>
> -void btrfs_put_block_group(struct btrfs_block_group *cache)
> +static void btrfs_free_block_group(struct percpu_ref *ref)
> {
> - if (refcount_dec_and_test(&cache->refs)) {
> - WARN_ON(cache->pinned > 0);
> - /*
> - * If there was a failure to cleanup a log tree, very likely due
> - * to an IO failure on a writeback attempt of one or more of its
> - * extent buffers, we could not do proper (and cheap) unaccounting
> - * of their reserved space, so don't warn on reserved > 0 in that
> - * case.
> - */
> - if (!(cache->flags & BTRFS_BLOCK_GROUP_METADATA) ||
> - !BTRFS_FS_LOG_CLEANUP_ERROR(cache->fs_info))
> - WARN_ON(cache->reserved > 0);
> + struct btrfs_block_group *cache =
> + container_of(ref, struct btrfs_block_group, refs);
I think for most new code we call these 'block_group' rather than
'cache', so I think this is a good opportunity to update it.
>
> - /*
> - * A block_group shouldn't be on the discard_list anymore.
> - * Remove the block_group from the discard_list to prevent us
> - * from causing a panic due to NULL pointer dereference.
> - */
> - if (WARN_ON(!list_empty(&cache->discard_list)))
> - btrfs_discard_cancel_work(&cache->fs_info->discard_ctl,
> - cache);
> + WARN_ON(cache->pinned > 0);
> + /*
> + * If there was a failure to cleanup a log tree, very likely due
> + * to an IO failure on a writeback attempt of one or more of its
> + * extent buffers, we could not do proper (and cheap) unaccounting
> + * of their reserved space, so don't warn on reserved > 0 in that
> + * case.
> + */
> + if (!(cache->flags & BTRFS_BLOCK_GROUP_METADATA) ||
> + !BTRFS_FS_LOG_CLEANUP_ERROR(cache->fs_info))
> + WARN_ON(cache->reserved > 0);
>
> - kfree(cache->free_space_ctl);
> - btrfs_free_chunk_map(cache->physical_map);
> - kfree(cache);
> - }
> + /*
> + * A block_group shouldn't be on the discard_list anymore.
> + * Remove the block_group from the discard_list to prevent us
> + * from causing a panic due to NULL pointer dereference.
> + */
> + if (WARN_ON(!list_empty(&cache->discard_list)))
> + btrfs_discard_cancel_work(&cache->fs_info->discard_ctl,
> + cache);
> +
> + percpu_ref_exit(&cache->refs);
> + kfree(cache->free_space_ctl);
> + btrfs_free_chunk_map(cache->physical_map);
> + kfree(cache);
> +}
> +
> +void btrfs_put_block_group(struct btrfs_block_group *cache)
> +{
> + percpu_ref_put(&cache->refs);
> }
>
> static int btrfs_bg_start_cmp(const struct rb_node *new,
> @@ -406,8 +413,8 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg)
> * on the groups' semaphore is held and decremented after releasing
> * the read access on that semaphore and creating the ordered extent.
> */
> - down_write(&space_info->groups_sem);
> - up_write(&space_info->groups_sem);
> + percpu_down_write(&space_info->groups_sem);
> + percpu_up_write(&space_info->groups_sem);
Did this sneak out of patch 2 into this patch? (several other instances
in this patch that I won't note)
>
> wait_var_event(&bg->reservations, !atomic_read(&bg->reservations));
> }
> @@ -1012,7 +1019,7 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
> struct btrfs_space_info *sinfo;
>
> list_for_each_entry_rcu(sinfo, head, list) {
> - down_read(&sinfo->groups_sem);
> + percpu_down_read(&sinfo->groups_sem);
> if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5]))
> found_raid56 = true;
> if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6]))
> @@ -1021,7 +1028,7 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
> found_raid1c34 = true;
> if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C4]))
> found_raid1c34 = true;
> - up_read(&sinfo->groups_sem);
> + percpu_up_read(&sinfo->groups_sem);
> }
> if (!found_raid56)
> btrfs_clear_fs_incompat(fs_info, RAID56);
> @@ -1159,11 +1166,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
> RB_CLEAR_NODE(&block_group->cache_node);
>
> /* Once for the block groups rbtree */
> - btrfs_put_block_group(block_group);
> + percpu_ref_kill(&block_group->refs);
>
> write_unlock(&fs_info->block_group_cache_lock);
>
> - down_write(&block_group->space_info->groups_sem);
> + percpu_down_write(&block_group->space_info->groups_sem);
> /*
> * we must use list_del_init so people can check to see if they
> * are still on the list after taking the semaphore
> @@ -1174,7 +1181,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
> block_group->space_info->block_group_kobjs[index] = NULL;
> clear_avail_alloc_bits(fs_info, block_group->flags);
> }
> - up_write(&block_group->space_info->groups_sem);
> + percpu_up_write(&block_group->space_info->groups_sem);
> clear_incompat_bg_bits(fs_info, block_group->flags);
> if (kobj) {
> kobject_del(kobj);
> @@ -1544,7 +1551,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
> btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
>
> /* Don't want to race with allocators so take the groups_sem */
> - down_write(&space_info->groups_sem);
> + percpu_down_write(&space_info->groups_sem);
>
> /*
> * Async discard moves the final block group discard to be prior
> @@ -1554,7 +1561,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
> if (btrfs_test_opt(fs_info, DISCARD_ASYNC) &&
> !btrfs_is_free_space_trimmed(block_group)) {
> trace_btrfs_skip_unused_block_group(block_group);
> - up_write(&space_info->groups_sem);
> + percpu_up_write(&space_info->groups_sem);
> /* Requeue if we failed because of async discard */
> btrfs_discard_queue_work(&fs_info->discard_ctl,
> block_group);
> @@ -1581,7 +1588,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
> trace_btrfs_skip_unused_block_group(block_group);
> spin_unlock(&block_group->lock);
> spin_unlock(&space_info->lock);
> - up_write(&space_info->groups_sem);
> + percpu_up_write(&space_info->groups_sem);
> goto next;
> }
>
> @@ -1618,7 +1625,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
> trace_btrfs_skip_unused_block_group(block_group);
> spin_unlock(&block_group->lock);
> spin_unlock(&space_info->lock);
> - up_write(&space_info->groups_sem);
> + percpu_up_write(&space_info->groups_sem);
> goto next;
> }
>
> @@ -1627,7 +1634,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
>
> /* We don't want to force the issue, only flip if it's ok. */
> ret = inc_block_group_ro(block_group, 0);
> - up_write(&space_info->groups_sem);
> + percpu_up_write(&space_info->groups_sem);
> if (ret < 0) {
> ret = 0;
> goto next;
> @@ -1882,7 +1889,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
> spin_unlock(&fs_info->unused_bgs_lock);
>
> /* Don't race with allocators so take the groups_sem */
> - down_write(&space_info->groups_sem);
> + percpu_down_write(&space_info->groups_sem);
>
> spin_lock(&space_info->lock);
> spin_lock(&bg->lock);
> @@ -1895,7 +1902,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
> */
> spin_unlock(&bg->lock);
> spin_unlock(&space_info->lock);
> - up_write(&space_info->groups_sem);
> + percpu_up_write(&space_info->groups_sem);
> goto next;
> }
> if (bg->used == 0) {
> @@ -1914,7 +1921,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
> btrfs_mark_bg_unused(bg);
> spin_unlock(&bg->lock);
> spin_unlock(&space_info->lock);
> - up_write(&space_info->groups_sem);
> + percpu_up_write(&space_info->groups_sem);
> goto next;
>
> }
> @@ -1931,7 +1938,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
> if (!should_reclaim_block_group(bg, bg->length)) {
> spin_unlock(&bg->lock);
> spin_unlock(&space_info->lock);
> - up_write(&space_info->groups_sem);
> + percpu_up_write(&space_info->groups_sem);
> goto next;
> }
>
> @@ -1947,12 +1954,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
> * never gets back to read-write to let us reclaim again.
> */
> if (btrfs_need_cleaner_sleep(fs_info)) {
> - up_write(&space_info->groups_sem);
> + percpu_up_write(&space_info->groups_sem);
> goto next;
> }
>
> ret = inc_block_group_ro(bg, 0);
> - up_write(&space_info->groups_sem);
> + percpu_up_write(&space_info->groups_sem);
> if (ret < 0)
> goto next;
>
> @@ -2288,7 +2295,12 @@ static struct btrfs_block_group *btrfs_create_block_group(
>
> cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
>
> - refcount_set(&cache->refs, 1);
> + if (percpu_ref_init(&cache->refs, btrfs_free_block_group,
> + 0, GFP_NOFS)) {
> + kfree(cache->free_space_ctl);
> + kfree(cache);
> + return NULL;
> + }
> spin_lock_init(&cache->lock);
> init_rwsem(&cache->data_rwsem);
> INIT_LIST_HEAD(&cache->list);
> @@ -4550,9 +4562,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
> RB_CLEAR_NODE(&block_group->cache_node);
> write_unlock(&info->block_group_cache_lock);
>
> - down_write(&block_group->space_info->groups_sem);
> + percpu_down_write(&block_group->space_info->groups_sem);
> list_del(&block_group->list);
> - up_write(&block_group->space_info->groups_sem);
> + percpu_up_write(&block_group->space_info->groups_sem);
>
> /*
> * We haven't cached this block group, which means we could
> @@ -4567,9 +4579,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
> ASSERT(list_empty(&block_group->dirty_list));
> ASSERT(list_empty(&block_group->io_list));
> ASSERT(list_empty(&block_group->bg_list));
> - ASSERT(refcount_read(&block_group->refs) == 1);
> + ASSERT(!percpu_ref_is_zero(&block_group->refs));
> ASSERT(block_group->swap_extents == 0);
> - btrfs_put_block_group(block_group);
> + percpu_ref_kill(&block_group->refs);
> + ASSERT(percpu_ref_is_zero(&block_group->refs));
>
> write_lock(&info->block_group_cache_lock);
> }
> diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
> index 5f933455118c..d44675f9d601 100644
> --- a/fs/btrfs/block-group.h
> +++ b/fs/btrfs/block-group.h
> @@ -178,7 +178,7 @@ struct btrfs_block_group {
> /* For block groups in the same raid type */
> struct list_head list;
>
> - refcount_t refs;
> + struct percpu_ref refs;
>
> /*
> * List of struct btrfs_free_clusters for this block group.
> --
> 2.39.5
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/7] btrfs: Use percpu semaphore for space info groups_sem
2026-01-12 16:17 ` [PATCH 2/7] btrfs: Use percpu semaphore for space info groups_sem Martin Raiber
2026-01-12 18:58 ` [PATCH v2 " Martin Raiber
@ 2026-01-12 22:58 ` Boris Burkov
2026-01-13 19:15 ` Martin Raiber
1 sibling, 1 reply; 15+ messages in thread
From: Boris Burkov @ 2026-01-12 22:58 UTC (permalink / raw)
To: Martin Raiber; +Cc: linux-btrfs
On Mon, Jan 12, 2026 at 04:17:17PM +0000, Martin Raiber wrote:
> Groups_sem is locked for write mostly only when adding
> or removing block groups, whereas it is locked for read
> constantly by multiple CPUs.
> Change it into a percpu semaphore to significantly
> increase the performance of find_free_extent.
This argument makes sense to me, and I don't think the proposal is wrong
or anything.
However, I am concerned about the low amount of evidence and detail for
a major change like this.
Can you share your benchmarking results?
What, if any, changes in fairness behavior would we expect going from
rwsem to percpu-rwsem?
Can you characterize the effect on adding / removing block groups? How
long does it currently take? How long does it take once you make it wait
for an rcu grace period? That will affect ENOSPC flushing which can be
blocking some task, so drastically hurting that performance could be bad.
I suspect it will be fine, though.
Thanks,
Boris
>
> Signed-off-by: Martin Raiber <martin@urbackup.org>
> ---
> fs/btrfs/extent-tree.c | 8 ++++----
> fs/btrfs/ioctl.c | 8 ++++----
> fs/btrfs/space-info.c | 29 +++++++++++++++++++----------
> fs/btrfs/space-info.h | 2 +-
> fs/btrfs/sysfs.c | 9 +++++----
> fs/btrfs/zoned.c | 11 +++++------
> 6 files changed, 38 insertions(+), 29 deletions(-)
>
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 1dcd69fe97ed..ce2eef069663 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -4442,7 +4442,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
> if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
> block_group->space_info == space_info &&
> block_group->cached != BTRFS_CACHE_NO) {
> - down_read(&space_info->groups_sem);
> + percpu_down_read(&space_info->groups_sem);
> if (list_empty(&block_group->list) ||
> block_group->ro) {
> /*
> @@ -4452,7 +4452,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
> * valid
> */
> btrfs_put_block_group(block_group);
> - up_read(&space_info->groups_sem);
> + percpu_up_read(&space_info->groups_sem);
> } else {
> ffe_ctl->index = btrfs_bg_flags_to_raid_index(
> block_group->flags);
> @@ -4471,7 +4471,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
> if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
> ffe_ctl->index == 0)
> full_search = true;
> - down_read(&space_info->groups_sem);
> + percpu_down_read(&space_info->groups_sem);
> list_for_each_entry(block_group,
> &space_info->block_groups[ffe_ctl->index], list) {
> struct btrfs_block_group *bg_ret;
> @@ -4609,7 +4609,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
> release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
> cond_resched();
> }
> - up_read(&space_info->groups_sem);
> + percpu_up_read(&space_info->groups_sem);
>
> ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, space_info,
> full_search);
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index d9e7dd317670..73ff0efc0381 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -2940,12 +2940,12 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
> if (!info)
> continue;
>
> - down_read(&info->groups_sem);
> + percpu_down_read(&info->groups_sem);
> for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
> if (!list_empty(&info->block_groups[c]))
> slot_count++;
> }
> - up_read(&info->groups_sem);
> + percpu_up_read(&info->groups_sem);
> }
>
> /*
> @@ -2992,7 +2992,7 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
>
> if (!info)
> continue;
> - down_read(&info->groups_sem);
> + percpu_down_read(&info->groups_sem);
> for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
> if (!list_empty(&info->block_groups[c])) {
> get_block_group_info(&info->block_groups[c],
> @@ -3005,7 +3005,7 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
> if (!slot_count)
> break;
> }
> - up_read(&info->groups_sem);
> + percpu_up_read(&info->groups_sem);
> }
>
> /*
> diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
> index 857e4fd2c77e..ddedeccbdade 100644
> --- a/fs/btrfs/space-info.c
> +++ b/fs/btrfs/space-info.c
> @@ -234,13 +234,14 @@ void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
> WRITE_ONCE(space_info->chunk_size, chunk_size);
> }
>
> -static void init_space_info(struct btrfs_fs_info *info,
> +static int init_space_info(struct btrfs_fs_info *info,
> struct btrfs_space_info *space_info, u64 flags)
> {
> space_info->fs_info = info;
> for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++)
> INIT_LIST_HEAD(&space_info->block_groups[i]);
> - init_rwsem(&space_info->groups_sem);
> + if (!percpu_init_rwsem(&space_info->groups_sem))
> + return -ENOMEM;
> spin_lock_init(&space_info->lock);
> space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
> space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
> @@ -253,6 +254,8 @@ static void init_space_info(struct btrfs_fs_info *info,
>
> if (btrfs_is_zoned(info))
> space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
> +
> + return 0;
> }
>
> static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flags,
> @@ -270,7 +273,10 @@ static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flag
> if (!sub_group)
> return -ENOMEM;
>
> - init_space_info(fs_info, sub_group, flags);
> + if (init_space_info(fs_info, sub_group, flags)) {
> + kfree(sub_group);
> + return -ENOMEM;
> + }
> parent->sub_group[index] = sub_group;
> sub_group->parent = parent;
> sub_group->subgroup_id = id;
> @@ -293,7 +299,10 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
> if (!space_info)
> return -ENOMEM;
>
> - init_space_info(info, space_info, flags);
> + if (init_space_info(info, space_info, flags)) {
> + kfree(space_info);
> + return -ENOMEM;
> + }
>
> if (btrfs_is_zoned(info)) {
> if (flags & BTRFS_BLOCK_GROUP_DATA)
> @@ -384,9 +393,9 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
> block_group->space_info = space_info;
>
> index = btrfs_bg_flags_to_raid_index(block_group->flags);
> - down_write(&space_info->groups_sem);
> + percpu_down_write(&space_info->groups_sem);
> list_add_tail(&block_group->list, &space_info->block_groups[index]);
> - up_write(&space_info->groups_sem);
> + percpu_up_write(&space_info->groups_sem);
> }
>
> struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
> @@ -650,7 +659,7 @@ void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
> if (!dump_block_groups)
> return;
>
> - down_read(&info->groups_sem);
> + percpu_down_read(&info->groups_sem);
> again:
> list_for_each_entry(cache, &info->block_groups[index], list) {
> u64 avail;
> @@ -670,7 +679,7 @@ void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
> }
> if (++index < BTRFS_NR_RAID_TYPES)
> goto again;
> - up_read(&info->groups_sem);
> + percpu_up_read(&info->groups_sem);
>
> btrfs_info(fs_info, "%llu bytes available across all block groups", total_avail);
> }
> @@ -2095,7 +2104,7 @@ static void do_reclaim_sweep(struct btrfs_space_info *space_info, int raid)
> thresh_pct = btrfs_calc_reclaim_threshold(space_info);
> spin_unlock(&space_info->lock);
>
> - down_read(&space_info->groups_sem);
> + percpu_down_read(&space_info->groups_sem);
> again:
> list_for_each_entry(bg, &space_info->block_groups[raid], list) {
> u64 thresh;
> @@ -2127,7 +2136,7 @@ static void do_reclaim_sweep(struct btrfs_space_info *space_info, int raid)
> goto again;
> }
>
> - up_read(&space_info->groups_sem);
> + percpu_up_read(&space_info->groups_sem);
> }
>
> void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes)
> diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
> index 0703f24b23f7..f99624069391 100644
> --- a/fs/btrfs/space-info.h
> +++ b/fs/btrfs/space-info.h
> @@ -175,7 +175,7 @@ struct btrfs_space_info {
> */
> u64 tickets_id;
>
> - struct rw_semaphore groups_sem;
> + struct percpu_rw_semaphore groups_sem;
> /* for block groups in our same type */
> struct list_head block_groups[BTRFS_NR_RAID_TYPES];
>
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index ebd6d1d6778b..ccec9eb1fa4f 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -701,14 +701,14 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
> int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags);
> u64 val = 0;
>
> - down_read(&sinfo->groups_sem);
> + percpu_down_read(&sinfo->groups_sem);
> list_for_each_entry(block_group, &sinfo->block_groups[index], list) {
> if (&attr->attr == BTRFS_ATTR_PTR(raid, total_bytes))
> val += block_group->length;
> else
> val += block_group->used;
> }
> - up_read(&sinfo->groups_sem);
> + percpu_up_read(&sinfo->groups_sem);
> return sysfs_emit(buf, "%llu\n", val);
> }
>
> @@ -816,7 +816,7 @@ static ssize_t btrfs_size_classes_show(struct kobject *kobj,
> u32 large = 0;
>
> for (int i = 0; i < BTRFS_NR_RAID_TYPES; ++i) {
> - down_read(&sinfo->groups_sem);
> + percpu_down_read(&sinfo->groups_sem);
> list_for_each_entry(bg, &sinfo->block_groups[i], list) {
> if (!btrfs_block_group_should_use_size_class(bg))
> continue;
> @@ -835,7 +835,7 @@ static ssize_t btrfs_size_classes_show(struct kobject *kobj,
> break;
> }
> }
> - up_read(&sinfo->groups_sem);
> + percpu_up_read(&sinfo->groups_sem);
> }
> return sysfs_emit(buf, "none %u\n"
> "small %u\n"
> @@ -1046,6 +1046,7 @@ ATTRIBUTE_GROUPS(space_info);
> static void space_info_release(struct kobject *kobj)
> {
> struct btrfs_space_info *sinfo = to_space_info(kobj);
> + percpu_free_rwsem(&sinfo->groups_sem);
> kfree(sinfo);
> }
>
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index 2e861eef5cd8..da92b0d38a1b 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -2588,12 +2588,11 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
> "reloc_sinfo->subgroup_id=%d", reloc_sinfo->subgroup_id);
> factor = btrfs_bg_type_to_factor(bg->flags);
>
> - down_write(&space_info->groups_sem);
> + percpu_down_write(&space_info->groups_sem);
> list_del_init(&bg->list);
> /* We can assume this as we choose the second empty one. */
> ASSERT(!list_empty(&space_info->block_groups[index]));
> - up_write(&space_info->groups_sem);
> -
> + percpu_up_write(&space_info->groups_sem);
> spin_lock(&space_info->lock);
> space_info->total_bytes -= bg->length;
> space_info->disk_total -= bg->length * factor;
> @@ -2771,7 +2770,7 @@ int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_fin
> int ret;
> bool need_finish = false;
>
> - down_read(&space_info->groups_sem);
> + percpu_down_read(&space_info->groups_sem);
> for (index = 0; index < BTRFS_NR_RAID_TYPES; index++) {
> list_for_each_entry(bg, &space_info->block_groups[index],
> list) {
> @@ -2786,14 +2785,14 @@ int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_fin
> spin_unlock(&bg->lock);
>
> if (btrfs_zone_activate(bg)) {
> - up_read(&space_info->groups_sem);
> + percpu_up_read(&space_info->groups_sem);
> return 1;
> }
>
> need_finish = true;
> }
> }
> - up_read(&space_info->groups_sem);
> + percpu_up_read(&space_info->groups_sem);
>
> if (!do_finish || !need_finish)
> break;
> --
> 2.39.5
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 6/7] btrfs: Introduce fast path for checking if a block group is done
2026-01-12 16:17 ` [PATCH 6/7] btrfs: Introduce fast path for checking if a block group is done Martin Raiber
@ 2026-01-13 3:48 ` Sun Yangkai
0 siblings, 0 replies; 15+ messages in thread
From: Sun Yangkai @ 2026-01-13 3:48 UTC (permalink / raw)
To: Martin Raiber, linux-btrfs
在 2026/1/13 00:17, Martin Raiber 写道:
> A block group cannot switch away from BTRFS_CACHE_FINISHED
> once it enters that state. Therefore we can introduce
> a fast path that checks for the likely case
> that the block group is already cached, avoiding
> a full memory barrier in the likely fast path.
>
> Signed-off-by: Martin Raiber <martin@urbackup.org>
> ---
> fs/btrfs/block-group.h | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
> index cf877747fd56..73bdf7091d49 100644
> --- a/fs/btrfs/block-group.h
> +++ b/fs/btrfs/block-group.h
> @@ -380,6 +380,9 @@ static inline u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
>
> static inline int btrfs_block_group_done(const struct btrfs_block_group *cache)
> {
> + if (likely(cache->cached == BTRFS_CACHE_FINISHED))
> + return 1;
> +
> smp_mb();
> return cache->cached == BTRFS_CACHE_FINISHED ||
> cache->cached == BTRFS_CACHE_ERROR;
This function returns bool so maybe we can also fix its return type here.
Thanks,
Sun YangKai
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/7] btrfs: Use percpu semaphore for space info groups_sem
2026-01-12 22:58 ` [PATCH " Boris Burkov
@ 2026-01-13 19:15 ` Martin Raiber
2026-01-13 22:27 ` Boris Burkov
0 siblings, 1 reply; 15+ messages in thread
From: Martin Raiber @ 2026-01-13 19:15 UTC (permalink / raw)
To: Boris Burkov; +Cc: linux-btrfs
On 12/01/2026 23:58 Boris Burkov wrote:
> On Mon, Jan 12, 2026 at 04:17:17PM +0000, Martin Raiber wrote:
>> Groups_sem is locked for write mostly only when adding
>> or removing block groups, whereas it is locked for read
>> constantly by multiple CPUs.
>> Change it into a percpu semaphore to significantly
>> increase the performance of find_free_extent.
> This argument makes sense to me, and I don't think the proposal is wrong
> or anything.
>
> However, I am concerned about the low amount of evidence and detail for
> a major change like this.
>
> Can you share your benchmarking results?
To be honest the benchmarking was that I looked at the throughput of the
one system
where it is having performance problems and then progressively fixed the
find_free_extent
bottleneck.
I do not know how common a bottleneck there is. E.g. a 10x faster CPU
could cause it
to be mostly be bottlenecked by IO and not by CPU synchronization
contention.
So I was hoping someone has an already setup systematic performance test
suite, that
has things like performance at nearly full, nearly empty, ssd_spread,
during rebalance, etc.
>
> What, if any, changes in fairness behavior would we expect going from
> rwsem to percpu-rwsem?
>
> Can you characterize the effect on adding / removing block groups? How
> long does it currently take? How long does it take once you make it wait
> for an rcu grace period? That will affect ENOSPC flushing which can be
> blocking some task, so drastically hurting that performance could be bad.
It makes performance of locking for write really bad, so the ENOSPC
flushing might be a
show stopper.
I suspect the whole block group updating could be handled by RCU, then
group_sem could be
removed. I haven't used RCU before and would have to learn how to use it
in this context
and it would not be a straight-forward change. And as mentioned in the
cover letter there
might be easier ways to optimize it on a higher level.
If someone tells me a valid work-around is to increase the chunk size
(from currently max 10
GiB) for larger file systems, that would e.g. also be a perhaps valid
path to fix this (albeit
not for existing file systems).
Thanks,
Martin
>
> I suspect it will be fine, though.
>
> Thanks,
> Boris
>
>> Signed-off-by: Martin Raiber <martin@urbackup.org>
>> ---
>> fs/btrfs/extent-tree.c | 8 ++++----
>> fs/btrfs/ioctl.c | 8 ++++----
>> fs/btrfs/space-info.c | 29 +++++++++++++++++++----------
>> fs/btrfs/space-info.h | 2 +-
>> fs/btrfs/sysfs.c | 9 +++++----
>> fs/btrfs/zoned.c | 11 +++++------
>> 6 files changed, 38 insertions(+), 29 deletions(-)
>>
>> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
>> index 1dcd69fe97ed..ce2eef069663 100644
>> --- a/fs/btrfs/extent-tree.c
>> +++ b/fs/btrfs/extent-tree.c
>> @@ -4442,7 +4442,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
>> if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
>> block_group->space_info == space_info &&
>> block_group->cached != BTRFS_CACHE_NO) {
>> - down_read(&space_info->groups_sem);
>> + percpu_down_read(&space_info->groups_sem);
>> if (list_empty(&block_group->list) ||
>> block_group->ro) {
>> /*
>> @@ -4452,7 +4452,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
>> * valid
>> */
>> btrfs_put_block_group(block_group);
>> - up_read(&space_info->groups_sem);
>> + percpu_up_read(&space_info->groups_sem);
>> } else {
>> ffe_ctl->index = btrfs_bg_flags_to_raid_index(
>> block_group->flags);
>> @@ -4471,7 +4471,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
>> if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
>> ffe_ctl->index == 0)
>> full_search = true;
>> - down_read(&space_info->groups_sem);
>> + percpu_down_read(&space_info->groups_sem);
>> list_for_each_entry(block_group,
>> &space_info->block_groups[ffe_ctl->index], list) {
>> struct btrfs_block_group *bg_ret;
>> @@ -4609,7 +4609,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
>> release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
>> cond_resched();
>> }
>> - up_read(&space_info->groups_sem);
>> + percpu_up_read(&space_info->groups_sem);
>>
>> ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, space_info,
>> full_search);
>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>> index d9e7dd317670..73ff0efc0381 100644
>> --- a/fs/btrfs/ioctl.c
>> +++ b/fs/btrfs/ioctl.c
>> @@ -2940,12 +2940,12 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
>> if (!info)
>> continue;
>>
>> - down_read(&info->groups_sem);
>> + percpu_down_read(&info->groups_sem);
>> for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
>> if (!list_empty(&info->block_groups[c]))
>> slot_count++;
>> }
>> - up_read(&info->groups_sem);
>> + percpu_up_read(&info->groups_sem);
>> }
>>
>> /*
>> @@ -2992,7 +2992,7 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
>>
>> if (!info)
>> continue;
>> - down_read(&info->groups_sem);
>> + percpu_down_read(&info->groups_sem);
>> for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
>> if (!list_empty(&info->block_groups[c])) {
>> get_block_group_info(&info->block_groups[c],
>> @@ -3005,7 +3005,7 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
>> if (!slot_count)
>> break;
>> }
>> - up_read(&info->groups_sem);
>> + percpu_up_read(&info->groups_sem);
>> }
>>
>> /*
>> diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
>> index 857e4fd2c77e..ddedeccbdade 100644
>> --- a/fs/btrfs/space-info.c
>> +++ b/fs/btrfs/space-info.c
>> @@ -234,13 +234,14 @@ void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
>> WRITE_ONCE(space_info->chunk_size, chunk_size);
>> }
>>
>> -static void init_space_info(struct btrfs_fs_info *info,
>> +static int init_space_info(struct btrfs_fs_info *info,
>> struct btrfs_space_info *space_info, u64 flags)
>> {
>> space_info->fs_info = info;
>> for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++)
>> INIT_LIST_HEAD(&space_info->block_groups[i]);
>> - init_rwsem(&space_info->groups_sem);
>> + if (!percpu_init_rwsem(&space_info->groups_sem))
>> + return -ENOMEM;
>> spin_lock_init(&space_info->lock);
>> space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
>> space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
>> @@ -253,6 +254,8 @@ static void init_space_info(struct btrfs_fs_info *info,
>>
>> if (btrfs_is_zoned(info))
>> space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
>> +
>> + return 0;
>> }
>>
>> static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flags,
>> @@ -270,7 +273,10 @@ static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flag
>> if (!sub_group)
>> return -ENOMEM;
>>
>> - init_space_info(fs_info, sub_group, flags);
>> + if (init_space_info(fs_info, sub_group, flags)) {
>> + kfree(sub_group);
>> + return -ENOMEM;
>> + }
>> parent->sub_group[index] = sub_group;
>> sub_group->parent = parent;
>> sub_group->subgroup_id = id;
>> @@ -293,7 +299,10 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
>> if (!space_info)
>> return -ENOMEM;
>>
>> - init_space_info(info, space_info, flags);
>> + if (init_space_info(info, space_info, flags)) {
>> + kfree(space_info);
>> + return -ENOMEM;
>> + }
>>
>> if (btrfs_is_zoned(info)) {
>> if (flags & BTRFS_BLOCK_GROUP_DATA)
>> @@ -384,9 +393,9 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
>> block_group->space_info = space_info;
>>
>> index = btrfs_bg_flags_to_raid_index(block_group->flags);
>> - down_write(&space_info->groups_sem);
>> + percpu_down_write(&space_info->groups_sem);
>> list_add_tail(&block_group->list, &space_info->block_groups[index]);
>> - up_write(&space_info->groups_sem);
>> + percpu_up_write(&space_info->groups_sem);
>> }
>>
>> struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
>> @@ -650,7 +659,7 @@ void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
>> if (!dump_block_groups)
>> return;
>>
>> - down_read(&info->groups_sem);
>> + percpu_down_read(&info->groups_sem);
>> again:
>> list_for_each_entry(cache, &info->block_groups[index], list) {
>> u64 avail;
>> @@ -670,7 +679,7 @@ void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
>> }
>> if (++index < BTRFS_NR_RAID_TYPES)
>> goto again;
>> - up_read(&info->groups_sem);
>> + percpu_up_read(&info->groups_sem);
>>
>> btrfs_info(fs_info, "%llu bytes available across all block groups", total_avail);
>> }
>> @@ -2095,7 +2104,7 @@ static void do_reclaim_sweep(struct btrfs_space_info *space_info, int raid)
>> thresh_pct = btrfs_calc_reclaim_threshold(space_info);
>> spin_unlock(&space_info->lock);
>>
>> - down_read(&space_info->groups_sem);
>> + percpu_down_read(&space_info->groups_sem);
>> again:
>> list_for_each_entry(bg, &space_info->block_groups[raid], list) {
>> u64 thresh;
>> @@ -2127,7 +2136,7 @@ static void do_reclaim_sweep(struct btrfs_space_info *space_info, int raid)
>> goto again;
>> }
>>
>> - up_read(&space_info->groups_sem);
>> + percpu_up_read(&space_info->groups_sem);
>> }
>>
>> void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes)
>> diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
>> index 0703f24b23f7..f99624069391 100644
>> --- a/fs/btrfs/space-info.h
>> +++ b/fs/btrfs/space-info.h
>> @@ -175,7 +175,7 @@ struct btrfs_space_info {
>> */
>> u64 tickets_id;
>>
>> - struct rw_semaphore groups_sem;
>> + struct percpu_rw_semaphore groups_sem;
>> /* for block groups in our same type */
>> struct list_head block_groups[BTRFS_NR_RAID_TYPES];
>>
>> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
>> index ebd6d1d6778b..ccec9eb1fa4f 100644
>> --- a/fs/btrfs/sysfs.c
>> +++ b/fs/btrfs/sysfs.c
>> @@ -701,14 +701,14 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
>> int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags);
>> u64 val = 0;
>>
>> - down_read(&sinfo->groups_sem);
>> + percpu_down_read(&sinfo->groups_sem);
>> list_for_each_entry(block_group, &sinfo->block_groups[index], list) {
>> if (&attr->attr == BTRFS_ATTR_PTR(raid, total_bytes))
>> val += block_group->length;
>> else
>> val += block_group->used;
>> }
>> - up_read(&sinfo->groups_sem);
>> + percpu_up_read(&sinfo->groups_sem);
>> return sysfs_emit(buf, "%llu\n", val);
>> }
>>
>> @@ -816,7 +816,7 @@ static ssize_t btrfs_size_classes_show(struct kobject *kobj,
>> u32 large = 0;
>>
>> for (int i = 0; i < BTRFS_NR_RAID_TYPES; ++i) {
>> - down_read(&sinfo->groups_sem);
>> + percpu_down_read(&sinfo->groups_sem);
>> list_for_each_entry(bg, &sinfo->block_groups[i], list) {
>> if (!btrfs_block_group_should_use_size_class(bg))
>> continue;
>> @@ -835,7 +835,7 @@ static ssize_t btrfs_size_classes_show(struct kobject *kobj,
>> break;
>> }
>> }
>> - up_read(&sinfo->groups_sem);
>> + percpu_up_read(&sinfo->groups_sem);
>> }
>> return sysfs_emit(buf, "none %u\n"
>> "small %u\n"
>> @@ -1046,6 +1046,7 @@ ATTRIBUTE_GROUPS(space_info);
>> static void space_info_release(struct kobject *kobj)
>> {
>> struct btrfs_space_info *sinfo = to_space_info(kobj);
>> + percpu_free_rwsem(&sinfo->groups_sem);
>> kfree(sinfo);
>> }
>>
>> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
>> index 2e861eef5cd8..da92b0d38a1b 100644
>> --- a/fs/btrfs/zoned.c
>> +++ b/fs/btrfs/zoned.c
>> @@ -2588,12 +2588,11 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
>> "reloc_sinfo->subgroup_id=%d", reloc_sinfo->subgroup_id);
>> factor = btrfs_bg_type_to_factor(bg->flags);
>>
>> - down_write(&space_info->groups_sem);
>> + percpu_down_write(&space_info->groups_sem);
>> list_del_init(&bg->list);
>> /* We can assume this as we choose the second empty one. */
>> ASSERT(!list_empty(&space_info->block_groups[index]));
>> - up_write(&space_info->groups_sem);
>> -
>> + percpu_up_write(&space_info->groups_sem);
>> spin_lock(&space_info->lock);
>> space_info->total_bytes -= bg->length;
>> space_info->disk_total -= bg->length * factor;
>> @@ -2771,7 +2770,7 @@ int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_fin
>> int ret;
>> bool need_finish = false;
>>
>> - down_read(&space_info->groups_sem);
>> + percpu_down_read(&space_info->groups_sem);
>> for (index = 0; index < BTRFS_NR_RAID_TYPES; index++) {
>> list_for_each_entry(bg, &space_info->block_groups[index],
>> list) {
>> @@ -2786,14 +2785,14 @@ int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_fin
>> spin_unlock(&bg->lock);
>>
>> if (btrfs_zone_activate(bg)) {
>> - up_read(&space_info->groups_sem);
>> + percpu_up_read(&space_info->groups_sem);
>> return 1;
>> }
>>
>> need_finish = true;
>> }
>> }
>> - up_read(&space_info->groups_sem);
>> + percpu_up_read(&space_info->groups_sem);
>>
>> if (!do_finish || !need_finish)
>> break;
>> --
>> 2.39.5
>>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/7] btrfs: Use percpu semaphore for space info groups_sem
2026-01-13 19:15 ` Martin Raiber
@ 2026-01-13 22:27 ` Boris Burkov
0 siblings, 0 replies; 15+ messages in thread
From: Boris Burkov @ 2026-01-13 22:27 UTC (permalink / raw)
To: Martin Raiber; +Cc: linux-btrfs
On Tue, Jan 13, 2026 at 07:15:52PM +0000, Martin Raiber wrote:
> On 12/01/2026 23:58 Boris Burkov wrote:
> > On Mon, Jan 12, 2026 at 04:17:17PM +0000, Martin Raiber wrote:
> > > Groups_sem is locked for write mostly only when adding
> > > or removing block groups, whereas it is locked for read
> > > constantly by multiple CPUs.
> > > Change it into a percpu semaphore to significantly
> > > increase the performance of find_free_extent.
> > This argument makes sense to me, and I don't think the proposal is wrong
> > or anything.
> >
> > However, I am concerned about the low amount of evidence and detail for
> > a major change like this.
> >
> > Can you share your benchmarking results?
>
> To be honest the benchmarking was that I looked at the throughput of the one
> system
> where it is having performance problems and then progressively fixed the
> find_free_extent
> bottleneck.
> I do not know how common a bottleneck there is. E.g. a 10x faster CPU could
> cause it
> to be mostly be bottlenecked by IO and not by CPU synchronization
> contention.
Do you have a sense of which ones of your changes are necessary vs
sufficient for your case?
Suppose, for example, we leave the percpu-rwsem as the riskiest and only
land the struct layout and percpu refcounting improvements. Or do you need
all of them to un-bottleneck your case?
>
> So I was hoping someone has an already setup systematic performance test
> suite, that
> has things like performance at nearly full, nearly empty, ssd_spread, during
> rebalance, etc.
>
In the past, I have attempted to stress allocator performance (to check
that my changes for stuff like size class didn't regress it too much)
with fsperf.
https://github.com/josefbacik/fsperf
It's far from perfect, but it will give you some useful outliers to look
at if you really ruin something :)
> >
> > What, if any, changes in fairness behavior would we expect going from
> > rwsem to percpu-rwsem?
> >
> > Can you characterize the effect on adding / removing block groups? How
> > long does it currently take? How long does it take once you make it wait
> > for an rcu grace period? That will affect ENOSPC flushing which can be
> > blocking some task, so drastically hurting that performance could be bad.
> It makes performance of locking for write really bad, so the ENOSPC flushing
> might be a
> show stopper.
OK, I can try to make this question / measurement more concrete.
>
> I suspect the whole block group updating could be handled by RCU, then
> group_sem could be
> removed. I haven't used RCU before and would have to learn how to use it in
> this context
> and it would not be a straight-forward change. And as mentioned in the
> cover letter there
> might be easier ways to optimize it on a higher level.
We do have a few places where the writer needs to be mutually exclusive
with the callers of find_free_extent (unused bg cleanup and reclaim
loops). However those run every 30s and can totally be slow, so maybe
there is a reasonable different way to accomplish that.
>
> If someone tells me a valid work-around is to increase the chunk size (from
> currently max 10
> GiB) for larger file systems, that would e.g. also be a perhaps valid path
> to fix this (albeit
> not for existing file systems).
I don't have an easy fix like that ready to go, unfortunately.
I think we should also be open to your point that there are natural
algorithmic improvements in find_free_extent beyond the current greedy
algorithm of "8 loops of look at every block group in creation order"
>
> Thanks,
> Martin
>
> >
> > I suspect it will be fine, though.
> >
> > Thanks,
> > Boris
> >
> > > Signed-off-by: Martin Raiber <martin@urbackup.org>
> > > ---
> > > fs/btrfs/extent-tree.c | 8 ++++----
> > > fs/btrfs/ioctl.c | 8 ++++----
> > > fs/btrfs/space-info.c | 29 +++++++++++++++++++----------
> > > fs/btrfs/space-info.h | 2 +-
> > > fs/btrfs/sysfs.c | 9 +++++----
> > > fs/btrfs/zoned.c | 11 +++++------
> > > 6 files changed, 38 insertions(+), 29 deletions(-)
> > >
> > > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> > > index 1dcd69fe97ed..ce2eef069663 100644
> > > --- a/fs/btrfs/extent-tree.c
> > > +++ b/fs/btrfs/extent-tree.c
> > > @@ -4442,7 +4442,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
> > > if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
> > > block_group->space_info == space_info &&
> > > block_group->cached != BTRFS_CACHE_NO) {
> > > - down_read(&space_info->groups_sem);
> > > + percpu_down_read(&space_info->groups_sem);
> > > if (list_empty(&block_group->list) ||
> > > block_group->ro) {
> > > /*
> > > @@ -4452,7 +4452,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
> > > * valid
> > > */
> > > btrfs_put_block_group(block_group);
> > > - up_read(&space_info->groups_sem);
> > > + percpu_up_read(&space_info->groups_sem);
> > > } else {
> > > ffe_ctl->index = btrfs_bg_flags_to_raid_index(
> > > block_group->flags);
> > > @@ -4471,7 +4471,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
> > > if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
> > > ffe_ctl->index == 0)
> > > full_search = true;
> > > - down_read(&space_info->groups_sem);
> > > + percpu_down_read(&space_info->groups_sem);
> > > list_for_each_entry(block_group,
> > > &space_info->block_groups[ffe_ctl->index], list) {
> > > struct btrfs_block_group *bg_ret;
> > > @@ -4609,7 +4609,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
> > > release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
> > > cond_resched();
> > > }
> > > - up_read(&space_info->groups_sem);
> > > + percpu_up_read(&space_info->groups_sem);
> > > ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, space_info,
> > > full_search);
> > > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> > > index d9e7dd317670..73ff0efc0381 100644
> > > --- a/fs/btrfs/ioctl.c
> > > +++ b/fs/btrfs/ioctl.c
> > > @@ -2940,12 +2940,12 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
> > > if (!info)
> > > continue;
> > > - down_read(&info->groups_sem);
> > > + percpu_down_read(&info->groups_sem);
> > > for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
> > > if (!list_empty(&info->block_groups[c]))
> > > slot_count++;
> > > }
> > > - up_read(&info->groups_sem);
> > > + percpu_up_read(&info->groups_sem);
> > > }
> > > /*
> > > @@ -2992,7 +2992,7 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
> > > if (!info)
> > > continue;
> > > - down_read(&info->groups_sem);
> > > + percpu_down_read(&info->groups_sem);
> > > for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
> > > if (!list_empty(&info->block_groups[c])) {
> > > get_block_group_info(&info->block_groups[c],
> > > @@ -3005,7 +3005,7 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
> > > if (!slot_count)
> > > break;
> > > }
> > > - up_read(&info->groups_sem);
> > > + percpu_up_read(&info->groups_sem);
> > > }
> > > /*
> > > diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
> > > index 857e4fd2c77e..ddedeccbdade 100644
> > > --- a/fs/btrfs/space-info.c
> > > +++ b/fs/btrfs/space-info.c
> > > @@ -234,13 +234,14 @@ void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
> > > WRITE_ONCE(space_info->chunk_size, chunk_size);
> > > }
> > > -static void init_space_info(struct btrfs_fs_info *info,
> > > +static int init_space_info(struct btrfs_fs_info *info,
> > > struct btrfs_space_info *space_info, u64 flags)
> > > {
> > > space_info->fs_info = info;
> > > for (int i = 0; i < BTRFS_NR_RAID_TYPES; i++)
> > > INIT_LIST_HEAD(&space_info->block_groups[i]);
> > > - init_rwsem(&space_info->groups_sem);
> > > + if (!percpu_init_rwsem(&space_info->groups_sem))
> > > + return -ENOMEM;
> > > spin_lock_init(&space_info->lock);
> > > space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
> > > space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
> > > @@ -253,6 +254,8 @@ static void init_space_info(struct btrfs_fs_info *info,
> > > if (btrfs_is_zoned(info))
> > > space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
> > > +
> > > + return 0;
> > > }
> > > static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flags,
> > > @@ -270,7 +273,10 @@ static int create_space_info_sub_group(struct btrfs_space_info *parent, u64 flag
> > > if (!sub_group)
> > > return -ENOMEM;
> > > - init_space_info(fs_info, sub_group, flags);
> > > + if (init_space_info(fs_info, sub_group, flags)) {
> > > + kfree(sub_group);
> > > + return -ENOMEM;
> > > + }
> > > parent->sub_group[index] = sub_group;
> > > sub_group->parent = parent;
> > > sub_group->subgroup_id = id;
> > > @@ -293,7 +299,10 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
> > > if (!space_info)
> > > return -ENOMEM;
> > > - init_space_info(info, space_info, flags);
> > > + if (init_space_info(info, space_info, flags)) {
> > > + kfree(space_info);
> > > + return -ENOMEM;
> > > + }
> > > if (btrfs_is_zoned(info)) {
> > > if (flags & BTRFS_BLOCK_GROUP_DATA)
> > > @@ -384,9 +393,9 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
> > > block_group->space_info = space_info;
> > > index = btrfs_bg_flags_to_raid_index(block_group->flags);
> > > - down_write(&space_info->groups_sem);
> > > + percpu_down_write(&space_info->groups_sem);
> > > list_add_tail(&block_group->list, &space_info->block_groups[index]);
> > > - up_write(&space_info->groups_sem);
> > > + percpu_up_write(&space_info->groups_sem);
> > > }
> > > struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
> > > @@ -650,7 +659,7 @@ void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
> > > if (!dump_block_groups)
> > > return;
> > > - down_read(&info->groups_sem);
> > > + percpu_down_read(&info->groups_sem);
> > > again:
> > > list_for_each_entry(cache, &info->block_groups[index], list) {
> > > u64 avail;
> > > @@ -670,7 +679,7 @@ void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes,
> > > }
> > > if (++index < BTRFS_NR_RAID_TYPES)
> > > goto again;
> > > - up_read(&info->groups_sem);
> > > + percpu_up_read(&info->groups_sem);
> > > btrfs_info(fs_info, "%llu bytes available across all block groups", total_avail);
> > > }
> > > @@ -2095,7 +2104,7 @@ static void do_reclaim_sweep(struct btrfs_space_info *space_info, int raid)
> > > thresh_pct = btrfs_calc_reclaim_threshold(space_info);
> > > spin_unlock(&space_info->lock);
> > > - down_read(&space_info->groups_sem);
> > > + percpu_down_read(&space_info->groups_sem);
> > > again:
> > > list_for_each_entry(bg, &space_info->block_groups[raid], list) {
> > > u64 thresh;
> > > @@ -2127,7 +2136,7 @@ static void do_reclaim_sweep(struct btrfs_space_info *space_info, int raid)
> > > goto again;
> > > }
> > > - up_read(&space_info->groups_sem);
> > > + percpu_up_read(&space_info->groups_sem);
> > > }
> > > void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes)
> > > diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
> > > index 0703f24b23f7..f99624069391 100644
> > > --- a/fs/btrfs/space-info.h
> > > +++ b/fs/btrfs/space-info.h
> > > @@ -175,7 +175,7 @@ struct btrfs_space_info {
> > > */
> > > u64 tickets_id;
> > > - struct rw_semaphore groups_sem;
> > > + struct percpu_rw_semaphore groups_sem;
> > > /* for block groups in our same type */
> > > struct list_head block_groups[BTRFS_NR_RAID_TYPES];
> > > diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> > > index ebd6d1d6778b..ccec9eb1fa4f 100644
> > > --- a/fs/btrfs/sysfs.c
> > > +++ b/fs/btrfs/sysfs.c
> > > @@ -701,14 +701,14 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
> > > int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags);
> > > u64 val = 0;
> > > - down_read(&sinfo->groups_sem);
> > > + percpu_down_read(&sinfo->groups_sem);
> > > list_for_each_entry(block_group, &sinfo->block_groups[index], list) {
> > > if (&attr->attr == BTRFS_ATTR_PTR(raid, total_bytes))
> > > val += block_group->length;
> > > else
> > > val += block_group->used;
> > > }
> > > - up_read(&sinfo->groups_sem);
> > > + percpu_up_read(&sinfo->groups_sem);
> > > return sysfs_emit(buf, "%llu\n", val);
> > > }
> > > @@ -816,7 +816,7 @@ static ssize_t btrfs_size_classes_show(struct kobject *kobj,
> > > u32 large = 0;
> > > for (int i = 0; i < BTRFS_NR_RAID_TYPES; ++i) {
> > > - down_read(&sinfo->groups_sem);
> > > + percpu_down_read(&sinfo->groups_sem);
> > > list_for_each_entry(bg, &sinfo->block_groups[i], list) {
> > > if (!btrfs_block_group_should_use_size_class(bg))
> > > continue;
> > > @@ -835,7 +835,7 @@ static ssize_t btrfs_size_classes_show(struct kobject *kobj,
> > > break;
> > > }
> > > }
> > > - up_read(&sinfo->groups_sem);
> > > + percpu_up_read(&sinfo->groups_sem);
> > > }
> > > return sysfs_emit(buf, "none %u\n"
> > > "small %u\n"
> > > @@ -1046,6 +1046,7 @@ ATTRIBUTE_GROUPS(space_info);
> > > static void space_info_release(struct kobject *kobj)
> > > {
> > > struct btrfs_space_info *sinfo = to_space_info(kobj);
> > > + percpu_free_rwsem(&sinfo->groups_sem);
> > > kfree(sinfo);
> > > }
> > > diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> > > index 2e861eef5cd8..da92b0d38a1b 100644
> > > --- a/fs/btrfs/zoned.c
> > > +++ b/fs/btrfs/zoned.c
> > > @@ -2588,12 +2588,11 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
> > > "reloc_sinfo->subgroup_id=%d", reloc_sinfo->subgroup_id);
> > > factor = btrfs_bg_type_to_factor(bg->flags);
> > > - down_write(&space_info->groups_sem);
> > > + percpu_down_write(&space_info->groups_sem);
> > > list_del_init(&bg->list);
> > > /* We can assume this as we choose the second empty one. */
> > > ASSERT(!list_empty(&space_info->block_groups[index]));
> > > - up_write(&space_info->groups_sem);
> > > -
> > > + percpu_up_write(&space_info->groups_sem);
> > > spin_lock(&space_info->lock);
> > > space_info->total_bytes -= bg->length;
> > > space_info->disk_total -= bg->length * factor;
> > > @@ -2771,7 +2770,7 @@ int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_fin
> > > int ret;
> > > bool need_finish = false;
> > > - down_read(&space_info->groups_sem);
> > > + percpu_down_read(&space_info->groups_sem);
> > > for (index = 0; index < BTRFS_NR_RAID_TYPES; index++) {
> > > list_for_each_entry(bg, &space_info->block_groups[index],
> > > list) {
> > > @@ -2786,14 +2785,14 @@ int btrfs_zoned_activate_one_bg(struct btrfs_space_info *space_info, bool do_fin
> > > spin_unlock(&bg->lock);
> > > if (btrfs_zone_activate(bg)) {
> > > - up_read(&space_info->groups_sem);
> > > + percpu_up_read(&space_info->groups_sem);
> > > return 1;
> > > }
> > > need_finish = true;
> > > }
> > > }
> > > - up_read(&space_info->groups_sem);
> > > + percpu_up_read(&space_info->groups_sem);
> > > if (!do_finish || !need_finish)
> > > break;
> > > --
> > > 2.39.5
> > >
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/7] btrfs: Use percpu refcounting for block groups
2026-01-12 16:17 ` [PATCH 1/7] btrfs: Use percpu refcounting for block groups Martin Raiber
2026-01-12 22:32 ` Boris Burkov
@ 2026-01-14 6:06 ` kernel test robot
1 sibling, 0 replies; 15+ messages in thread
From: kernel test robot @ 2026-01-14 6:06 UTC (permalink / raw)
To: Martin Raiber, linux-btrfs; +Cc: oe-kbuild-all, Martin Raiber
Hi Martin,
kernel test robot noticed the following build errors:
[auto build test ERROR on kdave/for-next]
[also build test ERROR on linus/master v6.19-rc5 next-20260113]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Martin-Raiber/btrfs-Use-percpu-semaphore-for-space-info-groups_sem/20260113-070107
base: https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git for-next
patch link: https://lore.kernel.org/r/0102019bb2ff56b7-9302e783-c17d-452d-b6a7-11f773776ae7-000000%40eu-west-1.amazonses.com
patch subject: [PATCH 1/7] btrfs: Use percpu refcounting for block groups
config: x86_64-rhel-9.4-ltp (https://download.01.org/0day-ci/archive/20260114/202601141316.7JnRkX9k-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260114/202601141316.7JnRkX9k-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202601141316.7JnRkX9k-lkp@intel.com/
All errors (new ones prefixed by >>):
fs/btrfs/block-group.c: In function 'btrfs_wait_block_group_reservations':
>> fs/btrfs/block-group.c:416:27: error: passing argument 1 of 'percpu_down_write' from incompatible pointer type [-Wincompatible-pointer-types]
416 | percpu_down_write(&space_info->groups_sem);
| ^~~~~~~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
In file included from include/linux/fs/super_types.h:13,
from include/linux/fs/super.h:5,
from include/linux/fs.h:5,
from include/linux/huge_mm.h:7,
from include/linux/mm.h:1268,
from fs/btrfs/misc.h:10,
from fs/btrfs/block-group.c:5:
include/linux/percpu-rwsem.h:138:31: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
138 | extern void percpu_down_write(struct percpu_rw_semaphore *);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> fs/btrfs/block-group.c:417:25: error: passing argument 1 of 'percpu_up_write' from incompatible pointer type [-Wincompatible-pointer-types]
417 | percpu_up_write(&space_info->groups_sem);
| ^~~~~~~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:139:29: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
139 | extern void percpu_up_write(struct percpu_rw_semaphore *);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/block-group.c: In function 'clear_incompat_bg_bits':
>> fs/btrfs/block-group.c:1022:42: error: passing argument 1 of 'percpu_down_read' from incompatible pointer type [-Wincompatible-pointer-types]
1022 | percpu_down_read(&sinfo->groups_sem);
| ^~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:75:65: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
75 | static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~
>> fs/btrfs/block-group.c:1031:40: error: passing argument 1 of 'percpu_up_read' from incompatible pointer type [-Wincompatible-pointer-types]
1031 | percpu_up_read(&sinfo->groups_sem);
| ^~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:110:63: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
110 | static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~
fs/btrfs/block-group.c: In function 'btrfs_remove_block_group':
fs/btrfs/block-group.c:1173:27: error: passing argument 1 of 'percpu_down_write' from incompatible pointer type [-Wincompatible-pointer-types]
1173 | percpu_down_write(&block_group->space_info->groups_sem);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:138:31: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
138 | extern void percpu_down_write(struct percpu_rw_semaphore *);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/block-group.c:1184:25: error: passing argument 1 of 'percpu_up_write' from incompatible pointer type [-Wincompatible-pointer-types]
1184 | percpu_up_write(&block_group->space_info->groups_sem);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:139:29: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
139 | extern void percpu_up_write(struct percpu_rw_semaphore *);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/block-group.c: In function 'btrfs_delete_unused_bgs':
fs/btrfs/block-group.c:1554:35: error: passing argument 1 of 'percpu_down_write' from incompatible pointer type [-Wincompatible-pointer-types]
1554 | percpu_down_write(&space_info->groups_sem);
| ^~~~~~~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:138:31: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
138 | extern void percpu_down_write(struct percpu_rw_semaphore *);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/block-group.c:1564:41: error: passing argument 1 of 'percpu_up_write' from incompatible pointer type [-Wincompatible-pointer-types]
1564 | percpu_up_write(&space_info->groups_sem);
| ^~~~~~~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:139:29: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
139 | extern void percpu_up_write(struct percpu_rw_semaphore *);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/block-group.c:1591:41: error: passing argument 1 of 'percpu_up_write' from incompatible pointer type [-Wincompatible-pointer-types]
1591 | percpu_up_write(&space_info->groups_sem);
| ^~~~~~~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:139:29: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
139 | extern void percpu_up_write(struct percpu_rw_semaphore *);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/block-group.c:1628:41: error: passing argument 1 of 'percpu_up_write' from incompatible pointer type [-Wincompatible-pointer-types]
1628 | percpu_up_write(&space_info->groups_sem);
| ^~~~~~~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:139:29: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
139 | extern void percpu_up_write(struct percpu_rw_semaphore *);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/block-group.c:1637:33: error: passing argument 1 of 'percpu_up_write' from incompatible pointer type [-Wincompatible-pointer-types]
1637 | percpu_up_write(&space_info->groups_sem);
| ^~~~~~~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:139:29: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
139 | extern void percpu_up_write(struct percpu_rw_semaphore *);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/block-group.c: In function 'btrfs_reclaim_bgs_work':
fs/btrfs/block-group.c:1892:35: error: passing argument 1 of 'percpu_down_write' from incompatible pointer type [-Wincompatible-pointer-types]
1892 | percpu_down_write(&space_info->groups_sem);
| ^~~~~~~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:138:31: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
138 | extern void percpu_down_write(struct percpu_rw_semaphore *);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/block-group.c:1905:41: error: passing argument 1 of 'percpu_up_write' from incompatible pointer type [-Wincompatible-pointer-types]
1905 | percpu_up_write(&space_info->groups_sem);
| ^~~~~~~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:139:29: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
139 | extern void percpu_up_write(struct percpu_rw_semaphore *);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/block-group.c:1924:41: error: passing argument 1 of 'percpu_up_write' from incompatible pointer type [-Wincompatible-pointer-types]
1924 | percpu_up_write(&space_info->groups_sem);
| ^~~~~~~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:139:29: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
139 | extern void percpu_up_write(struct percpu_rw_semaphore *);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/block-group.c:1941:41: error: passing argument 1 of 'percpu_up_write' from incompatible pointer type [-Wincompatible-pointer-types]
1941 | percpu_up_write(&space_info->groups_sem);
| ^~~~~~~~~~~~~~~~~~~~~~~
| |
| struct rw_semaphore *
include/linux/percpu-rwsem.h:139:29: note: expected 'struct percpu_rw_semaphore *' but argument is of type 'struct rw_semaphore *'
139 | extern void percpu_up_write(struct percpu_rw_semaphore *);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/btrfs/block-group.c:1957:41: error: passing argument 1 of 'percpu_up_write' from incompatible pointer type [-Wincompatible-pointer-types]
1957 | percpu_up_write(&space_info->groups_sem);
vim +/percpu_down_write +416 fs/btrfs/block-group.c
396
397 void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg)
398 {
399 struct btrfs_space_info *space_info = bg->space_info;
400
401 ASSERT(bg->ro);
402
403 if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
404 return;
405
406 /*
407 * Our block group is read only but before we set it to read only,
408 * some task might have had allocated an extent from it already, but it
409 * has not yet created a respective ordered extent (and added it to a
410 * root's list of ordered extents).
411 * Therefore wait for any task currently allocating extents, since the
412 * block group's reservations counter is incremented while a read lock
413 * on the groups' semaphore is held and decremented after releasing
414 * the read access on that semaphore and creating the ordered extent.
415 */
> 416 percpu_down_write(&space_info->groups_sem);
> 417 percpu_up_write(&space_info->groups_sem);
418
419 wait_var_event(&bg->reservations, !atomic_read(&bg->reservations));
420 }
421
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v2 2/7] btrfs: Use percpu semaphore for space info groups_sem
2026-01-12 18:58 ` [PATCH v2 " Martin Raiber
@ 2026-01-14 11:06 ` Johannes Thumshirn
0 siblings, 0 replies; 15+ messages in thread
From: Johannes Thumshirn @ 2026-01-14 11:06 UTC (permalink / raw)
To: Martin Raiber, linux-btrfs@vger.kernel.org
On 1/12/26 7:58 PM, Martin Raiber wrote:
> Change it into a percpu semaphore to significantly
> increase the performance of find_free_extent.
Do you have any numbers for this "significant performance increase"?
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2026-01-14 11:06 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20260112161549.2786827-1-martin@urbackup.org>
2026-01-12 16:17 ` [PATCH 1/7] btrfs: Use percpu refcounting for block groups Martin Raiber
2026-01-12 22:32 ` Boris Burkov
2026-01-14 6:06 ` kernel test robot
2026-01-12 16:17 ` [PATCH 2/7] btrfs: Use percpu semaphore for space info groups_sem Martin Raiber
2026-01-12 18:58 ` [PATCH v2 " Martin Raiber
2026-01-14 11:06 ` Johannes Thumshirn
2026-01-12 22:58 ` [PATCH " Boris Burkov
2026-01-13 19:15 ` Martin Raiber
2026-01-13 22:27 ` Boris Burkov
2026-01-12 16:17 ` [PATCH 3/7] btrfs: Don't lock data_rwsem if space cache v1 is not used Martin Raiber
2026-01-12 16:17 ` [PATCH 6/7] btrfs: Introduce fast path for checking if a block group is done Martin Raiber
2026-01-13 3:48 ` Sun Yangkai
2026-01-12 16:17 ` [PATCH 4/7] btrfs: Use percpu sem for block_group_cache_lock Martin Raiber
2026-01-12 16:17 ` [PATCH 5/7] btrfs: Skip locking percpu semaphores on mount Martin Raiber
2026-01-12 16:17 ` [PATCH 7/7] btrfs: Move block group members frequently accessed together closer Martin Raiber
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox