* [PATCH 1/3] btrfs: make the bg_reclaim_threshold per-space info
2022-03-10 17:58 [PATCH 0/3] btrfs: rework background block group relocation Josef Bacik
@ 2022-03-10 17:58 ` Josef Bacik
2022-03-11 7:46 ` Johannes Thumshirn
2022-03-10 17:58 ` [PATCH 2/3] btrfs: allow block group background reclaim for !zoned fs'es Josef Bacik
2022-03-10 17:58 ` [PATCH 3/3] btrfs: change the bg_reclaim_threshold valid region from 0 to 100 Josef Bacik
2 siblings, 1 reply; 7+ messages in thread
From: Josef Bacik @ 2022-03-10 17:58 UTC (permalink / raw)
To: linux-btrfs, kernel-team
For !zoned file systems it's useful to have the auto reclaim feature,
however there are different use cases for !zoned, for example we may not
want to reclaim metadata chunks ever, only data chunks. Move this sysfs
flag to per-space_info. This won't affect current users because this
tunable only ever did anything for zoned, and that is currently hidden
behind BTRFS_CONFIG_DEBUG.
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
fs/btrfs/ctree.h | 1 -
fs/btrfs/disk-io.c | 1 -
fs/btrfs/free-space-cache.c | 4 +--
fs/btrfs/space-info.c | 9 +++++
fs/btrfs/space-info.h | 6 ++++
fs/btrfs/sysfs.c | 71 +++++++++++++++++++------------------
fs/btrfs/zoned.h | 6 ----
7 files changed, 53 insertions(+), 45 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4db17bd05a21..1953ea40755d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1015,7 +1015,6 @@ struct btrfs_fs_info {
/* Reclaim partially filled block groups in the background */
struct work_struct reclaim_bgs_work;
struct list_head reclaim_bgs;
- int bg_reclaim_threshold;
spinlock_t unused_bgs_lock;
struct list_head unused_bgs;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 09693ab4fde0..c135b79bf3e3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3246,7 +3246,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
spin_lock_init(&fs_info->swapfile_pins_lock);
fs_info->swapfile_pins = RB_ROOT;
- fs_info->bg_reclaim_threshold = BTRFS_DEFAULT_RECLAIM_THRESH;
INIT_WORK(&fs_info->reclaim_bgs_work, btrfs_reclaim_bgs_work);
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 01a408db5683..01ac1161aec5 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2630,11 +2630,11 @@ int __btrfs_add_free_space(struct btrfs_block_group *block_group,
static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
u64 bytenr, u64 size, bool used)
{
- struct btrfs_fs_info *fs_info = block_group->fs_info;
+ struct btrfs_space_info *sinfo = block_group->space_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
u64 offset = bytenr - block_group->start;
u64 to_free, to_unusable;
- const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold);
+ const int bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
bool initial = (size == block_group->length);
u64 reclaimable_unusable;
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index b87931a458eb..60d0a58c4644 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -181,6 +181,12 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
found->full = 0;
}
+/*
+ * Block groups with more than this value (percents) of unusable space will be
+ * scheduled for background reclaim.
+ */
+#define BTRFS_DEFAULT_ZONED_RECLAIM_THRESH 75
+
static int create_space_info(struct btrfs_fs_info *info, u64 flags)
{
@@ -203,6 +209,9 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
INIT_LIST_HEAD(&space_info->priority_tickets);
space_info->clamp = 1;
+ if (btrfs_is_zoned(info))
+ space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
+
ret = btrfs_sysfs_add_space_info_type(info, space_info);
if (ret)
return ret;
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index d841fed73492..0c45f539e3cf 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -24,6 +24,12 @@ struct btrfs_space_info {
the space info if we had an ENOSPC in the
allocator. */
+ /*
+ * Once a block group drops below this threshold we'll schedule it for
+ * reclaim.
+ */
+ int bg_reclaim_threshold;
+
int clamp; /* Used to scale our threshold for preemptive
flushing. The value is >> clamp, so turns
out to be a 2^clamp divisor. */
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 17389a42a3ab..d11ff1c55394 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -722,6 +722,41 @@ SPACE_INFO_ATTR(bytes_zone_unusable);
SPACE_INFO_ATTR(disk_used);
SPACE_INFO_ATTR(disk_total);
+static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj,
+ struct kobj_attribute *a,
+ char *buf)
+{
+ struct btrfs_space_info *space_info = to_space_info(kobj);
+ ssize_t ret;
+
+ ret = sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold));
+
+ return ret;
+}
+
+static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
+ struct kobj_attribute *a,
+ const char *buf, size_t len)
+{
+ struct btrfs_space_info *space_info = to_space_info(kobj);
+ int thresh;
+ int ret;
+
+ ret = kstrtoint(buf, 10, &thresh);
+ if (ret)
+ return ret;
+
+ if (thresh != 0 && (thresh <= 50 || thresh > 100))
+ return -EINVAL;
+
+ WRITE_ONCE(space_info->bg_reclaim_threshold, thresh);
+
+ return len;
+}
+
+BTRFS_ATTR_RW(space_info, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
+ btrfs_bg_reclaim_threshold_store);
+
/*
* Allocation information about block group types.
*
@@ -738,6 +773,7 @@ static struct attribute *space_info_attrs[] = {
BTRFS_ATTR_PTR(space_info, bytes_zone_unusable),
BTRFS_ATTR_PTR(space_info, disk_used),
BTRFS_ATTR_PTR(space_info, disk_total),
+ BTRFS_ATTR_PTR(space_info, bg_reclaim_threshold),
NULL,
};
ATTRIBUTE_GROUPS(space_info);
@@ -1021,40 +1057,6 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
}
BTRFS_ATTR_RW(, read_policy, btrfs_read_policy_show, btrfs_read_policy_store);
-static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj,
- struct kobj_attribute *a,
- char *buf)
-{
- struct btrfs_fs_info *fs_info = to_fs_info(kobj);
- ssize_t ret;
-
- ret = sysfs_emit(buf, "%d\n", READ_ONCE(fs_info->bg_reclaim_threshold));
-
- return ret;
-}
-
-static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
- struct kobj_attribute *a,
- const char *buf, size_t len)
-{
- struct btrfs_fs_info *fs_info = to_fs_info(kobj);
- int thresh;
- int ret;
-
- ret = kstrtoint(buf, 10, &thresh);
- if (ret)
- return ret;
-
- if (thresh != 0 && (thresh <= 50 || thresh > 100))
- return -EINVAL;
-
- WRITE_ONCE(fs_info->bg_reclaim_threshold, thresh);
-
- return len;
-}
-BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
- btrfs_bg_reclaim_threshold_store);
-
/*
* Per-filesystem information and stats.
*
@@ -1071,7 +1073,6 @@ static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, exclusive_operation),
BTRFS_ATTR_PTR(, generation),
BTRFS_ATTR_PTR(, read_policy),
- BTRFS_ATTR_PTR(, bg_reclaim_threshold),
NULL,
};
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index cbf016a7bb5d..9075c87a397e 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -10,12 +10,6 @@
#include "block-group.h"
#include "btrfs_inode.h"
-/*
- * Block groups with more than this value (percents) of unusable space will be
- * scheduled for background reclaim.
- */
-#define BTRFS_DEFAULT_RECLAIM_THRESH 75
-
struct btrfs_zoned_device_info {
/*
* Number of zones, zone size and types of zones if bdev is a
--
2.26.3
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH 2/3] btrfs: allow block group background reclaim for !zoned fs'es
2022-03-10 17:58 [PATCH 0/3] btrfs: rework background block group relocation Josef Bacik
2022-03-10 17:58 ` [PATCH 1/3] btrfs: make the bg_reclaim_threshold per-space info Josef Bacik
@ 2022-03-10 17:58 ` Josef Bacik
2022-03-11 7:48 ` Johannes Thumshirn
2022-03-10 17:58 ` [PATCH 3/3] btrfs: change the bg_reclaim_threshold valid region from 0 to 100 Josef Bacik
2 siblings, 1 reply; 7+ messages in thread
From: Josef Bacik @ 2022-03-10 17:58 UTC (permalink / raw)
To: linux-btrfs, kernel-team
We have found this feature invaluable at Facebook due to how our
workload interacts with the allocator. We have been using this in
production for months with only a single problem that has already been
fixed. This will allow us to set a threshold for block groups to be
automatically relocated even if we don't have zoned devices.
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
fs/btrfs/block-group.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index c22d287e020b..ca43daba292a 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -3230,6 +3230,31 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
return ret;
}
+static inline bool should_reclaim_block_group(struct btrfs_block_group *block_group,
+ u64 bytes_freed)
+{
+ const struct btrfs_space_info *space_info = block_group->space_info;
+ const int reclaim_thresh = READ_ONCE(space_info->bg_reclaim_threshold);
+ const u64 new_val = block_group->used;
+ const u64 old_val = new_val + bytes_freed;
+ u64 thresh;
+
+ if (reclaim_thresh == 0)
+ return false;
+
+ thresh = div_factor_fine(block_group->length, reclaim_thresh);
+
+ /*
+ * If we were below the threshold before don't reclaim, we are likely a
+ * brand new block group and we don't want to relocate new block groups.
+ */
+ if (old_val < thresh)
+ return false;
+ if (new_val >= thresh)
+ return false;
+ return true;
+}
+
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, bool alloc)
{
@@ -3252,6 +3277,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&info->delalloc_root_lock);
while (total) {
+ bool reclaim;
+
cache = btrfs_lookup_block_group(info, bytenr);
if (!cache) {
ret = -ENOENT;
@@ -3297,6 +3324,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
cache->space_info, num_bytes);
cache->space_info->bytes_used -= num_bytes;
cache->space_info->disk_used -= num_bytes * factor;
+
+ reclaim = should_reclaim_block_group(cache, num_bytes);
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
@@ -3323,6 +3352,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
if (!alloc && old_val == 0) {
if (!btrfs_test_opt(info, DISCARD_ASYNC))
btrfs_mark_bg_unused(cache);
+ } else if (!alloc && reclaim) {
+ btrfs_mark_bg_to_reclaim(cache);
}
btrfs_put_block_group(cache);
--
2.26.3
^ permalink raw reply related [flat|nested] 7+ messages in thread