linux-bcachefs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] Fast device removal
@ 2025-05-04 19:51 Kent Overstreet
  2025-05-04 19:51 ` [PATCH 1/3] bcachefs: BCH_SB_MEMBER_DELETED_UUID Kent Overstreet
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Kent Overstreet @ 2025-05-04 19:51 UTC (permalink / raw)
  To: linux-bcachefs; +Cc: Kent Overstreet

Previously, device removal had to do a full metadata scan to check for
pointers to the device being removed.

Instead, we can now walk backpointers - drastically faster on large
filesystems, particularly after we've already done an evacuate.

Since we don't fully trust backpointers this requires an incompatible
upgrade - we need a sentinal value on member devices so that device
indexes aren't reused until after a fsck.

Kent Overstreet (3):
  bcachefs: BCH_SB_MEMBER_DELETED_UUID
  bcachefs: bch2_dev_data_drop_by_backpointers()
  bcachefs: bcachefs_metadata_version_fast_device_removal

 fs/bcachefs/bcachefs_format.h   |   3 +-
 fs/bcachefs/btree_gc.c          |   4 ++
 fs/bcachefs/migrate.c           | 107 +++++++++++++++++++++++++++-----
 fs/bcachefs/migrate.h           |   3 +-
 fs/bcachefs/sb-members.c        |  29 ++++++++-
 fs/bcachefs/sb-members.h        |   4 +-
 fs/bcachefs/sb-members_format.h |   4 ++
 fs/bcachefs/super.c             |  25 +++++++-
 8 files changed, 158 insertions(+), 21 deletions(-)

-- 
2.49.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] bcachefs: BCH_SB_MEMBER_DELETED_UUID
  2025-05-04 19:51 [PATCH 0/3] Fast device removal Kent Overstreet
@ 2025-05-04 19:51 ` Kent Overstreet
  2025-05-04 19:51 ` [PATCH 2/3] bcachefs: bch2_dev_data_drop_by_backpointers() Kent Overstreet
  2025-05-04 19:51 ` [PATCH 3/3] bcachefs: bcachefs_metadata_version_fast_device_removal Kent Overstreet
  2 siblings, 0 replies; 4+ messages in thread
From: Kent Overstreet @ 2025-05-04 19:51 UTC (permalink / raw)
  To: linux-bcachefs; +Cc: Kent Overstreet

Add a sentinal value for devices that have been removed, but don't want
to reuse their index until a fsck has completed.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/btree_gc.c          |  4 ++++
 fs/bcachefs/sb-members.c        | 29 ++++++++++++++++++++++++++++-
 fs/bcachefs/sb-members.h        |  4 +++-
 fs/bcachefs/sb-members_format.h |  4 ++++
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 92ae31737a24..dd08ec080313 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -1079,6 +1079,10 @@ int bch2_check_allocations(struct bch_fs *c)
 	 * allocator thread - issue wakeup in case they blocked on gc_lock:
 	 */
 	closure_wake_up(&c->freelist_wait);
+
+	if (!ret && !test_bit(BCH_FS_errors_not_fixed, &c->flags))
+		bch2_sb_members_clean_deleted(c);
+
 	bch_err_fn(c, ret);
 	return ret;
 }
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index 9c383d9a5f4d..e5c68c2f1655 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -525,6 +525,7 @@ int bch2_sb_member_alloc(struct bch_fs *c)
 	unsigned u64s;
 	int best = -1;
 	u64 best_last_mount = 0;
+	unsigned nr_deleted = 0;
 
 	if (dev_idx < BCH_SB_MEMBERS_MAX)
 		goto have_slot;
@@ -535,7 +536,10 @@ int bch2_sb_member_alloc(struct bch_fs *c)
 			continue;
 
 		struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx);
-		if (bch2_member_alive(&m))
+
+		nr_deleted += uuid_equal(&m.uuid, &BCH_SB_MEMBER_DELETED_UUID);
+
+		if (!bch2_is_zero(&m.uuid, sizeof(m.uuid)))
 			continue;
 
 		u64 last_mount = le64_to_cpu(m.last_mount);
@@ -549,6 +553,10 @@ int bch2_sb_member_alloc(struct bch_fs *c)
 		goto have_slot;
 	}
 
+	if (nr_deleted)
+		bch_err(c, "unable to allocate new member, but have %u deleted: run fsck",
+			nr_deleted);
+
 	return -BCH_ERR_ENOSPC_sb_members;
 have_slot:
 	nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
@@ -564,3 +572,22 @@ int bch2_sb_member_alloc(struct bch_fs *c)
 	c->disk_sb.sb->nr_devices = nr_devices;
 	return dev_idx;
 }
+
+void bch2_sb_members_clean_deleted(struct bch_fs *c)
+{
+	mutex_lock(&c->sb_lock);
+	bool write_sb = false;
+
+	for (unsigned i = 0; i < c->sb.nr_devices; i++) {
+		struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, i);
+
+		if (uuid_equal(&m->uuid, &BCH_SB_MEMBER_DELETED_UUID)) {
+			memset(&m->uuid, 0, sizeof(m->uuid));
+			write_sb = true;
+		}
+	}
+
+	if (write_sb)
+		bch2_write_super(c);
+	mutex_unlock(&c->sb_lock);
+}
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index c9cb8f7657b0..6bd9b86aee5b 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -320,7 +320,8 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2;
 
 static inline bool bch2_member_alive(struct bch_member *m)
 {
-	return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
+	return  !bch2_is_zero(&m->uuid, sizeof(m->uuid)) &&
+		!uuid_equal(&m->uuid, &BCH_SB_MEMBER_DELETED_UUID);
 }
 
 static inline bool bch2_member_exists(struct bch_sb *sb, unsigned dev)
@@ -381,5 +382,6 @@ bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
 void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
 
 int bch2_sb_member_alloc(struct bch_fs *);
+void bch2_sb_members_clean_deleted(struct bch_fs *);
 
 #endif /* _BCACHEFS_SB_MEMBERS_H */
diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h
index 472218a59102..fb72ad730518 100644
--- a/fs/bcachefs/sb-members_format.h
+++ b/fs/bcachefs/sb-members_format.h
@@ -13,6 +13,10 @@
  */
 #define BCH_SB_MEMBER_INVALID		255
 
+#define BCH_SB_MEMBER_DELETED_UUID					\
+	UUID_INIT(0xffffffff, 0xffff, 0xffff,				\
+		  0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef)
+
 #define BCH_MIN_NR_NBUCKETS	(1 << 6)
 
 #define BCH_IOPS_MEASUREMENTS()			\
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] bcachefs: bch2_dev_data_drop_by_backpointers()
  2025-05-04 19:51 [PATCH 0/3] Fast device removal Kent Overstreet
  2025-05-04 19:51 ` [PATCH 1/3] bcachefs: BCH_SB_MEMBER_DELETED_UUID Kent Overstreet
@ 2025-05-04 19:51 ` Kent Overstreet
  2025-05-04 19:51 ` [PATCH 3/3] bcachefs: bcachefs_metadata_version_fast_device_removal Kent Overstreet
  2 siblings, 0 replies; 4+ messages in thread
From: Kent Overstreet @ 2025-05-04 19:51 UTC (permalink / raw)
  To: linux-bcachefs; +Cc: Kent Overstreet

Currently, device removal has to scan all metadata for pointers to the
device being removed.

Add a new method, with the same interface as bch2_dev_data_drop(), that
scans by backpointers instead - this will drastically speed up device
removal.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/migrate.c | 107 ++++++++++++++++++++++++++++++++++++------
 fs/bcachefs/migrate.h |   3 +-
 2 files changed, 95 insertions(+), 15 deletions(-)

diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index 90dcf80bd64a..f431586a971f 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -4,9 +4,11 @@
  */
 
 #include "bcachefs.h"
+#include "backpointers.h"
 #include "bkey_buf.h"
 #include "btree_update.h"
 #include "btree_update_interior.h"
+#include "btree_write_buffer.h"
 #include "buckets.h"
 #include "errcode.h"
 #include "extents.h"
@@ -20,7 +22,7 @@
 #include "super-io.h"
 
 static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
-			 unsigned dev_idx, int flags, bool metadata)
+			 unsigned dev_idx, unsigned flags, bool metadata)
 {
 	unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
 	unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST;
@@ -37,11 +39,28 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
 	return 0;
 }
 
+static int drop_btree_ptrs(struct btree_trans *trans, struct btree_iter *iter,
+			   struct btree *b, unsigned dev_idx, unsigned flags)
+{
+	struct bch_fs *c = trans->c;
+	struct bkey_buf k;
+
+	bch2_bkey_buf_init(&k);
+	bch2_bkey_buf_copy(&k, c, &b->key);
+
+	int ret = drop_dev_ptrs(c, bkey_i_to_s(k.k), dev_idx, flags, true) ?:
+		bch2_btree_node_update_key(trans, iter, b, k.k, 0, false);
+
+	bch_err_fn(c, ret);
+	bch2_bkey_buf_exit(&k, c);
+	return ret;
+}
+
 static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
 				     struct btree_iter *iter,
 				     struct bkey_s_c k,
 				     unsigned dev_idx,
-				     int flags)
+				     unsigned flags)
 {
 	struct bch_fs *c = trans->c;
 	struct bkey_i *n;
@@ -77,9 +96,27 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
 	return 0;
 }
 
+static int bch2_dev_btree_drop_key(struct btree_trans *trans,
+				   struct bkey_s_c_backpointer bp,
+				   unsigned dev_idx,
+				   struct bkey_buf *last_flushed,
+				   unsigned flags)
+{
+	struct btree_iter iter;
+	struct btree *b = bch2_backpointer_get_node(trans, bp, &iter, last_flushed);
+	int ret = PTR_ERR_OR_ZERO(b);
+	if (ret)
+		return ret == -BCH_ERR_backpointer_to_overwritten_btree_node ? 0 : ret;
+
+	ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags);
+
+	bch2_trans_iter_exit(trans, &iter);
+	return ret;
+}
+
 static int bch2_dev_usrdata_drop(struct bch_fs *c,
 				 struct progress_indicator_state *progress,
-				 unsigned dev_idx, int flags)
+				 unsigned dev_idx, unsigned flags)
 {
 	struct btree_trans *trans = bch2_trans_get(c);
 	enum btree_id id;
@@ -106,7 +143,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c,
 
 static int bch2_dev_metadata_drop(struct bch_fs *c,
 				  struct progress_indicator_state *progress,
-				  unsigned dev_idx, int flags)
+				  unsigned dev_idx, unsigned flags)
 {
 	struct btree_trans *trans;
 	struct btree_iter iter;
@@ -137,20 +174,12 @@ static int bch2_dev_metadata_drop(struct bch_fs *c,
 			if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx))
 				goto next;
 
-			bch2_bkey_buf_copy(&k, c, &b->key);
-
-			ret = drop_dev_ptrs(c, bkey_i_to_s(k.k),
-					    dev_idx, flags, true);
-			if (ret)
-				break;
-
-			ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false);
+			ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags);
 			if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
 				ret = 0;
 				continue;
 			}
 
-			bch_err_msg(c, ret, "updating btree node key");
 			if (ret)
 				break;
 next:
@@ -176,7 +205,57 @@ static int bch2_dev_metadata_drop(struct bch_fs *c,
 	return ret;
 }
 
-int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
+static int data_drop_bp(struct btree_trans *trans, unsigned dev_idx,
+			struct bkey_s_c_backpointer bp, struct bkey_buf *last_flushed,
+			unsigned flags)
+{
+	struct btree_iter iter;
+	struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed);
+	int ret = bkey_err(k);
+	if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+		return 0;
+	if (ret)
+		return ret;
+
+	if (!bch2_bkey_has_device_c(k, dev_idx))
+		goto out;
+
+	ret = bkey_is_btree_ptr(k.k)
+		? bch2_dev_btree_drop_key(trans, bp, dev_idx, last_flushed, flags)
+		: bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags);
+out:
+	bch2_trans_iter_exit(trans, &iter);
+	return ret;
+}
+
+int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsigned flags)
+{
+	struct btree_trans *trans = bch2_trans_get(c);
+
+	struct bkey_buf last_flushed;
+	bch2_bkey_buf_init(&last_flushed);
+	bkey_init(&last_flushed.k->k);
+
+	int ret = bch2_btree_write_buffer_flush_sync(trans) ?:
+		for_each_btree_key_max_commit(trans, iter, BTREE_ID_backpointers,
+				POS(dev_idx, 0),
+				POS(dev_idx, U64_MAX), 0, k,
+				NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+			if (k.k->type != KEY_TYPE_backpointer)
+				continue;
+
+			data_drop_bp(trans, dev_idx, bkey_s_c_to_backpointer(k),
+				     &last_flushed, flags);
+
+	}));
+
+	bch2_bkey_buf_exit(&last_flushed, trans->c);
+	bch2_trans_put(trans);
+	bch_err_fn(c, ret);
+	return ret;
+}
+
+int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, unsigned flags)
 {
 	struct progress_indicator_state progress;
 	bch2_progress_init(&progress, c,
diff --git a/fs/bcachefs/migrate.h b/fs/bcachefs/migrate.h
index 027efaa0d575..30018140711b 100644
--- a/fs/bcachefs/migrate.h
+++ b/fs/bcachefs/migrate.h
@@ -2,6 +2,7 @@
 #ifndef _BCACHEFS_MIGRATE_H
 #define _BCACHEFS_MIGRATE_H
 
-int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
+int bch2_dev_data_drop_by_backpointers(struct bch_fs *, unsigned, unsigned);
+int bch2_dev_data_drop(struct bch_fs *, unsigned, unsigned);
 
 #endif /* _BCACHEFS_MIGRATE_H */
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] bcachefs: bcachefs_metadata_version_fast_device_removal
  2025-05-04 19:51 [PATCH 0/3] Fast device removal Kent Overstreet
  2025-05-04 19:51 ` [PATCH 1/3] bcachefs: BCH_SB_MEMBER_DELETED_UUID Kent Overstreet
  2025-05-04 19:51 ` [PATCH 2/3] bcachefs: bch2_dev_data_drop_by_backpointers() Kent Overstreet
@ 2025-05-04 19:51 ` Kent Overstreet
  2 siblings, 0 replies; 4+ messages in thread
From: Kent Overstreet @ 2025-05-04 19:51 UTC (permalink / raw)
  To: linux-bcachefs; +Cc: Kent Overstreet

Fast device removal, that uses backpointers to find pointers to the
device being removed instead of a full metadata scan.

This requires BCH_SB_MEMBER_DELETED_UUID, which is an incompatible
change - hence the version number bump. We don't fully trust
backpointers, so we don't want to reuse device indexes until after a
fsck has verified that there aren't any pointers to removed devices.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bcachefs_format.h |  3 ++-
 fs/bcachefs/super.c           | 25 ++++++++++++++++++++++---
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 0beff6af7ecf..baaf9786238b 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -696,7 +696,8 @@ struct bch_sb_field_ext {
 	x(stripe_lru,			BCH_VERSION(1, 23))		\
 	x(casefolding,			BCH_VERSION(1, 24))		\
 	x(extent_flags,			BCH_VERSION(1, 25))		\
-	x(snapshot_deletion_v2,		BCH_VERSION(1, 26))
+	x(snapshot_deletion_v2,		BCH_VERSION(1, 26))		\
+	x(fast_device_removal,		BCH_VERSION(1, 27))
 
 enum bcachefs_metadata_version {
 	bcachefs_metadata_version_min = 9,
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 35b07410a8c6..18d8823cdb79 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1719,6 +1719,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
 {
 	struct bch_member *m;
 	unsigned dev_idx = ca->dev_idx, data;
+	bool fast_device_removal = !bch2_request_incompat_feature(c,
+					bcachefs_metadata_version_fast_device_removal);
 	int ret;
 
 	down_write(&c->state_lock);
@@ -1737,11 +1739,24 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
 
 	__bch2_dev_read_only(c, ca);
 
-	ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
-	bch_err_msg(ca, ret, "bch2_dev_data_drop()");
+	ret = fast_device_removal
+		? bch2_dev_data_drop_by_backpointers(c, ca->dev_idx, flags)
+		: bch2_dev_data_drop(c, ca->dev_idx, flags);
 	if (ret)
 		goto err;
 
+	/* Check if device still has data */
+	struct bch_dev_usage usage = bch2_dev_usage_read(ca);
+	for (unsigned i = 0; i < BCH_DATA_NR; i++)
+		if (!data_type_is_empty(i) &&
+		    !data_type_is_hidden(i) &&
+		    usage.buckets[i]) {
+			bch_err(ca, "Remove failed: still has data (%s, %llu buckets)",
+				__bch2_data_types[i], usage.buckets[i]);
+			ret = -EBUSY;
+			goto err;
+		}
+
 	ret = bch2_dev_remove_alloc(c, ca);
 	bch_err_msg(ca, ret, "bch2_dev_remove_alloc()");
 	if (ret)
@@ -1805,7 +1820,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
 	 */
 	mutex_lock(&c->sb_lock);
 	m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx);
-	memset(&m->uuid, 0, sizeof(m->uuid));
+
+	if (fast_device_removal)
+		m->uuid = BCH_SB_MEMBER_DELETED_UUID;
+	else
+		memset(&m->uuid, 0, sizeof(m->uuid));
 
 	bch2_write_super(c);
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-05-04 19:52 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-04 19:51 [PATCH 0/3] Fast device removal Kent Overstreet
2025-05-04 19:51 ` [PATCH 1/3] bcachefs: BCH_SB_MEMBER_DELETED_UUID Kent Overstreet
2025-05-04 19:51 ` [PATCH 2/3] bcachefs: bch2_dev_data_drop_by_backpointers() Kent Overstreet
2025-05-04 19:51 ` [PATCH 3/3] bcachefs: bcachefs_metadata_version_fast_device_removal Kent Overstreet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).