* [PATCH 0/3] Fast device removal
@ 2025-05-04 19:51 Kent Overstreet
2025-05-04 19:51 ` [PATCH 1/3] bcachefs: BCH_SB_MEMBER_DELETED_UUID Kent Overstreet
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Kent Overstreet @ 2025-05-04 19:51 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Previously, device removal had to do a full metadata scan to check for
pointers to the device being removed.
Instead, we can now walk backpointers - drastically faster on large
filesystems, particularly after we've already done an evacuate.
Since we don't fully trust backpointers this requires an incompatible
upgrade - we need a sentinal value on member devices so that device
indexes aren't reused until after a fsck.
Kent Overstreet (3):
bcachefs: BCH_SB_MEMBER_DELETED_UUID
bcachefs: bch2_dev_data_drop_by_backpointers()
bcachefs: bcachefs_metadata_version_fast_device_removal
fs/bcachefs/bcachefs_format.h | 3 +-
fs/bcachefs/btree_gc.c | 4 ++
fs/bcachefs/migrate.c | 107 +++++++++++++++++++++++++++-----
fs/bcachefs/migrate.h | 3 +-
fs/bcachefs/sb-members.c | 29 ++++++++-
fs/bcachefs/sb-members.h | 4 +-
fs/bcachefs/sb-members_format.h | 4 ++
fs/bcachefs/super.c | 25 +++++++-
8 files changed, 158 insertions(+), 21 deletions(-)
--
2.49.0
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/3] bcachefs: BCH_SB_MEMBER_DELETED_UUID
2025-05-04 19:51 [PATCH 0/3] Fast device removal Kent Overstreet
@ 2025-05-04 19:51 ` Kent Overstreet
2025-05-04 19:51 ` [PATCH 2/3] bcachefs: bch2_dev_data_drop_by_backpointers() Kent Overstreet
2025-05-04 19:51 ` [PATCH 3/3] bcachefs: bcachefs_metadata_version_fast_device_removal Kent Overstreet
2 siblings, 0 replies; 4+ messages in thread
From: Kent Overstreet @ 2025-05-04 19:51 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Add a sentinal value for devices that have been removed, but don't want
to reuse their index until a fsck has completed.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/btree_gc.c | 4 ++++
fs/bcachefs/sb-members.c | 29 ++++++++++++++++++++++++++++-
fs/bcachefs/sb-members.h | 4 +++-
fs/bcachefs/sb-members_format.h | 4 ++++
4 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 92ae31737a24..dd08ec080313 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -1079,6 +1079,10 @@ int bch2_check_allocations(struct bch_fs *c)
* allocator thread - issue wakeup in case they blocked on gc_lock:
*/
closure_wake_up(&c->freelist_wait);
+
+ if (!ret && !test_bit(BCH_FS_errors_not_fixed, &c->flags))
+ bch2_sb_members_clean_deleted(c);
+
bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index 9c383d9a5f4d..e5c68c2f1655 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -525,6 +525,7 @@ int bch2_sb_member_alloc(struct bch_fs *c)
unsigned u64s;
int best = -1;
u64 best_last_mount = 0;
+ unsigned nr_deleted = 0;
if (dev_idx < BCH_SB_MEMBERS_MAX)
goto have_slot;
@@ -535,7 +536,10 @@ int bch2_sb_member_alloc(struct bch_fs *c)
continue;
struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx);
- if (bch2_member_alive(&m))
+
+ nr_deleted += uuid_equal(&m.uuid, &BCH_SB_MEMBER_DELETED_UUID);
+
+ if (!bch2_is_zero(&m.uuid, sizeof(m.uuid)))
continue;
u64 last_mount = le64_to_cpu(m.last_mount);
@@ -549,6 +553,10 @@ int bch2_sb_member_alloc(struct bch_fs *c)
goto have_slot;
}
+ if (nr_deleted)
+ bch_err(c, "unable to allocate new member, but have %u deleted: run fsck",
+ nr_deleted);
+
return -BCH_ERR_ENOSPC_sb_members;
have_slot:
nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
@@ -564,3 +572,22 @@ int bch2_sb_member_alloc(struct bch_fs *c)
c->disk_sb.sb->nr_devices = nr_devices;
return dev_idx;
}
+
+void bch2_sb_members_clean_deleted(struct bch_fs *c)
+{
+ mutex_lock(&c->sb_lock);
+ bool write_sb = false;
+
+ for (unsigned i = 0; i < c->sb.nr_devices; i++) {
+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, i);
+
+ if (uuid_equal(&m->uuid, &BCH_SB_MEMBER_DELETED_UUID)) {
+ memset(&m->uuid, 0, sizeof(m->uuid));
+ write_sb = true;
+ }
+ }
+
+ if (write_sb)
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+}
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index c9cb8f7657b0..6bd9b86aee5b 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -320,7 +320,8 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2;
static inline bool bch2_member_alive(struct bch_member *m)
{
- return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
+ return !bch2_is_zero(&m->uuid, sizeof(m->uuid)) &&
+ !uuid_equal(&m->uuid, &BCH_SB_MEMBER_DELETED_UUID);
}
static inline bool bch2_member_exists(struct bch_sb *sb, unsigned dev)
@@ -381,5 +382,6 @@ bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
int bch2_sb_member_alloc(struct bch_fs *);
+void bch2_sb_members_clean_deleted(struct bch_fs *);
#endif /* _BCACHEFS_SB_MEMBERS_H */
diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h
index 472218a59102..fb72ad730518 100644
--- a/fs/bcachefs/sb-members_format.h
+++ b/fs/bcachefs/sb-members_format.h
@@ -13,6 +13,10 @@
*/
#define BCH_SB_MEMBER_INVALID 255
+#define BCH_SB_MEMBER_DELETED_UUID \
+ UUID_INIT(0xffffffff, 0xffff, 0xffff, \
+ 0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef)
+
#define BCH_MIN_NR_NBUCKETS (1 << 6)
#define BCH_IOPS_MEASUREMENTS() \
--
2.49.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] bcachefs: bch2_dev_data_drop_by_backpointers()
2025-05-04 19:51 [PATCH 0/3] Fast device removal Kent Overstreet
2025-05-04 19:51 ` [PATCH 1/3] bcachefs: BCH_SB_MEMBER_DELETED_UUID Kent Overstreet
@ 2025-05-04 19:51 ` Kent Overstreet
2025-05-04 19:51 ` [PATCH 3/3] bcachefs: bcachefs_metadata_version_fast_device_removal Kent Overstreet
2 siblings, 0 replies; 4+ messages in thread
From: Kent Overstreet @ 2025-05-04 19:51 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Currently, device removal has to scan all metadata for pointers to the
device being removed.
Add a new method, with the same interface as bch2_dev_data_drop(), that
scans by backpointers instead - this will drastically speed up device
removal.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/migrate.c | 107 ++++++++++++++++++++++++++++++++++++------
fs/bcachefs/migrate.h | 3 +-
2 files changed, 95 insertions(+), 15 deletions(-)
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index 90dcf80bd64a..f431586a971f 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -4,9 +4,11 @@
*/
#include "bcachefs.h"
+#include "backpointers.h"
#include "bkey_buf.h"
#include "btree_update.h"
#include "btree_update_interior.h"
+#include "btree_write_buffer.h"
#include "buckets.h"
#include "errcode.h"
#include "extents.h"
@@ -20,7 +22,7 @@
#include "super-io.h"
static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
- unsigned dev_idx, int flags, bool metadata)
+ unsigned dev_idx, unsigned flags, bool metadata)
{
unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST;
@@ -37,11 +39,28 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
return 0;
}
+static int drop_btree_ptrs(struct btree_trans *trans, struct btree_iter *iter,
+ struct btree *b, unsigned dev_idx, unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_buf k;
+
+ bch2_bkey_buf_init(&k);
+ bch2_bkey_buf_copy(&k, c, &b->key);
+
+ int ret = drop_dev_ptrs(c, bkey_i_to_s(k.k), dev_idx, flags, true) ?:
+ bch2_btree_node_update_key(trans, iter, b, k.k, 0, false);
+
+ bch_err_fn(c, ret);
+ bch2_bkey_buf_exit(&k, c);
+ return ret;
+}
+
static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
unsigned dev_idx,
- int flags)
+ unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_i *n;
@@ -77,9 +96,27 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
return 0;
}
+static int bch2_dev_btree_drop_key(struct btree_trans *trans,
+ struct bkey_s_c_backpointer bp,
+ unsigned dev_idx,
+ struct bkey_buf *last_flushed,
+ unsigned flags)
+{
+ struct btree_iter iter;
+ struct btree *b = bch2_backpointer_get_node(trans, bp, &iter, last_flushed);
+ int ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ return ret == -BCH_ERR_backpointer_to_overwritten_btree_node ? 0 : ret;
+
+ ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags);
+
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
static int bch2_dev_usrdata_drop(struct bch_fs *c,
struct progress_indicator_state *progress,
- unsigned dev_idx, int flags)
+ unsigned dev_idx, unsigned flags)
{
struct btree_trans *trans = bch2_trans_get(c);
enum btree_id id;
@@ -106,7 +143,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c,
static int bch2_dev_metadata_drop(struct bch_fs *c,
struct progress_indicator_state *progress,
- unsigned dev_idx, int flags)
+ unsigned dev_idx, unsigned flags)
{
struct btree_trans *trans;
struct btree_iter iter;
@@ -137,20 +174,12 @@ static int bch2_dev_metadata_drop(struct bch_fs *c,
if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx))
goto next;
- bch2_bkey_buf_copy(&k, c, &b->key);
-
- ret = drop_dev_ptrs(c, bkey_i_to_s(k.k),
- dev_idx, flags, true);
- if (ret)
- break;
-
- ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false);
+ ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
ret = 0;
continue;
}
- bch_err_msg(c, ret, "updating btree node key");
if (ret)
break;
next:
@@ -176,7 +205,57 @@ static int bch2_dev_metadata_drop(struct bch_fs *c,
return ret;
}
-int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
+static int data_drop_bp(struct btree_trans *trans, unsigned dev_idx,
+ struct bkey_s_c_backpointer bp, struct bkey_buf *last_flushed,
+ unsigned flags)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed);
+ int ret = bkey_err(k);
+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+ return 0;
+ if (ret)
+ return ret;
+
+ if (!bch2_bkey_has_device_c(k, dev_idx))
+ goto out;
+
+ ret = bkey_is_btree_ptr(k.k)
+ ? bch2_dev_btree_drop_key(trans, bp, dev_idx, last_flushed, flags)
+ : bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsigned flags)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+
+ struct bkey_buf last_flushed;
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ int ret = bch2_btree_write_buffer_flush_sync(trans) ?:
+ for_each_btree_key_max_commit(trans, iter, BTREE_ID_backpointers,
+ POS(dev_idx, 0),
+ POS(dev_idx, U64_MAX), 0, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ if (k.k->type != KEY_TYPE_backpointer)
+ continue;
+
+ data_drop_bp(trans, dev_idx, bkey_s_c_to_backpointer(k),
+ &last_flushed, flags);
+
+ }));
+
+ bch2_bkey_buf_exit(&last_flushed, trans->c);
+ bch2_trans_put(trans);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, unsigned flags)
{
struct progress_indicator_state progress;
bch2_progress_init(&progress, c,
diff --git a/fs/bcachefs/migrate.h b/fs/bcachefs/migrate.h
index 027efaa0d575..30018140711b 100644
--- a/fs/bcachefs/migrate.h
+++ b/fs/bcachefs/migrate.h
@@ -2,6 +2,7 @@
#ifndef _BCACHEFS_MIGRATE_H
#define _BCACHEFS_MIGRATE_H
-int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
+int bch2_dev_data_drop_by_backpointers(struct bch_fs *, unsigned, unsigned);
+int bch2_dev_data_drop(struct bch_fs *, unsigned, unsigned);
#endif /* _BCACHEFS_MIGRATE_H */
--
2.49.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] bcachefs: bcachefs_metadata_version_fast_device_removal
2025-05-04 19:51 [PATCH 0/3] Fast device removal Kent Overstreet
2025-05-04 19:51 ` [PATCH 1/3] bcachefs: BCH_SB_MEMBER_DELETED_UUID Kent Overstreet
2025-05-04 19:51 ` [PATCH 2/3] bcachefs: bch2_dev_data_drop_by_backpointers() Kent Overstreet
@ 2025-05-04 19:51 ` Kent Overstreet
2 siblings, 0 replies; 4+ messages in thread
From: Kent Overstreet @ 2025-05-04 19:51 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Fast device removal, that uses backpointers to find pointers to the
device being removed instead of a full metadata scan.
This requires BCH_SB_MEMBER_DELETED_UUID, which is an incompatible
change - hence the version number bump. We don't fully trust
backpointers, so we don't want to reuse device indexes until after a
fsck has verified that there aren't any pointers to removed devices.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/bcachefs_format.h | 3 ++-
fs/bcachefs/super.c | 25 ++++++++++++++++++++++---
2 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 0beff6af7ecf..baaf9786238b 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -696,7 +696,8 @@ struct bch_sb_field_ext {
x(stripe_lru, BCH_VERSION(1, 23)) \
x(casefolding, BCH_VERSION(1, 24)) \
x(extent_flags, BCH_VERSION(1, 25)) \
- x(snapshot_deletion_v2, BCH_VERSION(1, 26))
+ x(snapshot_deletion_v2, BCH_VERSION(1, 26)) \
+ x(fast_device_removal, BCH_VERSION(1, 27))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 35b07410a8c6..18d8823cdb79 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1719,6 +1719,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
{
struct bch_member *m;
unsigned dev_idx = ca->dev_idx, data;
+ bool fast_device_removal = !bch2_request_incompat_feature(c,
+ bcachefs_metadata_version_fast_device_removal);
int ret;
down_write(&c->state_lock);
@@ -1737,11 +1739,24 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
__bch2_dev_read_only(c, ca);
- ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
- bch_err_msg(ca, ret, "bch2_dev_data_drop()");
+ ret = fast_device_removal
+ ? bch2_dev_data_drop_by_backpointers(c, ca->dev_idx, flags)
+ : bch2_dev_data_drop(c, ca->dev_idx, flags);
if (ret)
goto err;
+ /* Check if device still has data */
+ struct bch_dev_usage usage = bch2_dev_usage_read(ca);
+ for (unsigned i = 0; i < BCH_DATA_NR; i++)
+ if (!data_type_is_empty(i) &&
+ !data_type_is_hidden(i) &&
+ usage.buckets[i]) {
+ bch_err(ca, "Remove failed: still has data (%s, %llu buckets)",
+ __bch2_data_types[i], usage.buckets[i]);
+ ret = -EBUSY;
+ goto err;
+ }
+
ret = bch2_dev_remove_alloc(c, ca);
bch_err_msg(ca, ret, "bch2_dev_remove_alloc()");
if (ret)
@@ -1805,7 +1820,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
*/
mutex_lock(&c->sb_lock);
m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx);
- memset(&m->uuid, 0, sizeof(m->uuid));
+
+ if (fast_device_removal)
+ m->uuid = BCH_SB_MEMBER_DELETED_UUID;
+ else
+ memset(&m->uuid, 0, sizeof(m->uuid));
bch2_write_super(c);
--
2.49.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-05-04 19:52 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-04 19:51 [PATCH 0/3] Fast device removal Kent Overstreet
2025-05-04 19:51 ` [PATCH 1/3] bcachefs: BCH_SB_MEMBER_DELETED_UUID Kent Overstreet
2025-05-04 19:51 ` [PATCH 2/3] bcachefs: bch2_dev_data_drop_by_backpointers() Kent Overstreet
2025-05-04 19:51 ` [PATCH 3/3] bcachefs: bcachefs_metadata_version_fast_device_removal Kent Overstreet
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).