linux-bcachefs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] bcachefs: Ensure we don't return with closure on waitlist
@ 2025-07-18  6:47 Kent Overstreet
  2025-07-18  6:47 ` [PATCH 2/4] bcachefs: bch2_move_data() now walks btree nodes Kent Overstreet
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Kent Overstreet @ 2025-07-18  6:47 UTC (permalink / raw)
  To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+0ea2c41a649240197795

things will break wildly if we leave a stack allocated closure on a
waitlist

Reported-by: syzbot+0ea2c41a649240197795@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/ec.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 62dda821247e..bea14f02114f 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -2060,6 +2060,9 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
 	BUG_ON(trans->restarted);
 	return h;
 err:
+	if (waiting &&
+	    !bch2_err_matches(ret, BCH_ERR_operation_blocked))
+		closure_wake_up(&c->freelist_wait);
 	bch2_ec_stripe_head_put(c, h);
 	return ERR_PTR(ret);
 }
-- 
2.50.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/4] bcachefs: bch2_move_data() now walks btree nodes
  2025-07-18  6:47 [PATCH 1/4] bcachefs: Ensure we don't return with closure on waitlist Kent Overstreet
@ 2025-07-18  6:47 ` Kent Overstreet
  2025-07-18  6:47 ` [PATCH 3/4] bcachefs: rereplicate flushes interior updates Kent Overstreet
  2025-07-18  6:47 ` [PATCH 4/4] bcachefs: can_use_btree_node() Kent Overstreet
  2 siblings, 0 replies; 4+ messages in thread
From: Kent Overstreet @ 2025-07-18  6:47 UTC (permalink / raw)
  To: linux-bcachefs; +Cc: Kent Overstreet

bch2_move_data_btree() can already walk btree nodes, and it properly
handles btree roots.

This is a code cleanup - and it fixes the rereplicate2 test, we can now
reliably rerereplicate in one psas.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/move.c | 95 ++++++++++++++++++----------------------------
 fs/bcachefs/move.h | 12 ------
 2 files changed, 36 insertions(+), 71 deletions(-)

diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 3f44bb54f91a..3419e406f0c7 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -795,50 +795,50 @@ int bch2_move_data_btree(struct moving_context *ctxt,
 	return ret;
 }
 
-int __bch2_move_data(struct moving_context *ctxt,
-		     struct bbpos start,
-		     struct bbpos end,
-		     move_pred_fn pred, void *arg)
+static int bch2_move_data(struct bch_fs *c,
+			  struct bbpos start,
+			  struct bbpos end,
+			  unsigned min_depth,
+			  struct bch_ratelimit *rate,
+			  struct bch_move_stats *stats,
+			  struct write_point_specifier wp,
+			  bool wait_on_copygc,
+			  move_pred_fn pred, void *arg)
 {
-	struct bch_fs *c = ctxt->trans->c;
-	enum btree_id id;
 	int ret = 0;
 
-	for (id = start.btree;
+	struct moving_context ctxt;
+	bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
+
+	for (enum btree_id id = start.btree;
 	     id <= min_t(unsigned, end.btree, btree_id_nr_alive(c) - 1);
 	     id++) {
-		ctxt->stats->pos = BBPOS(id, POS_MIN);
+		ctxt.stats->pos = BBPOS(id, POS_MIN);
 
-		if (!btree_type_has_ptrs(id) ||
-		    !bch2_btree_id_root(c, id)->b)
+		if (!bch2_btree_id_root(c, id)->b)
 			continue;
 
-		ret = bch2_move_data_btree(ctxt,
-				       id == start.btree ? start.pos : POS_MIN,
-				       id == end.btree   ? end.pos   : POS_MAX,
-				       pred, arg, id, 0);
+		unsigned min_depth_this_btree = min_depth;
+
+		if (!btree_type_has_ptrs(id))
+			min_depth_this_btree = max(min_depth_this_btree, 1);
+
+		for (unsigned level = min_depth_this_btree;
+		     level < BTREE_MAX_DEPTH;
+		     level++) {
+			ret = bch2_move_data_btree(&ctxt,
+						   id == start.btree ? start.pos : POS_MIN,
+						   id == end.btree   ? end.pos   : POS_MAX,
+						   pred, arg, id, level);
+			if (ret)
+				break;
+		}
+
 		if (ret)
 			break;
 	}
 
-	return ret;
-}
-
-int bch2_move_data(struct bch_fs *c,
-		   struct bbpos start,
-		   struct bbpos end,
-		   struct bch_ratelimit *rate,
-		   struct bch_move_stats *stats,
-		   struct write_point_specifier wp,
-		   bool wait_on_copygc,
-		   move_pred_fn pred, void *arg)
-{
-	struct moving_context ctxt;
-
-	bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc);
-	int ret = __bch2_move_data(&ctxt, start, end, pred, arg);
 	bch2_moving_ctxt_exit(&ctxt);
-
 	return ret;
 }
 
@@ -1206,14 +1206,6 @@ static bool migrate_pred(struct bch_fs *c, void *arg,
 	return data_opts->rewrite_ptrs != 0;
 }
 
-static bool rereplicate_btree_pred(struct bch_fs *c, void *arg,
-				   struct btree *b,
-				   struct bch_io_opts *io_opts,
-				   struct data_update_opts *data_opts)
-{
-	return rereplicate_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key), io_opts, data_opts);
-}
-
 /*
  * Ancient versions of bcachefs produced packed formats which could represent
  * keys that the in memory format cannot represent; this checks for those
@@ -1293,15 +1285,6 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg,
 	return data_opts->kill_ptrs != 0;
 }
 
-static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg,
-				   struct btree *b,
-				   struct bch_io_opts *io_opts,
-				   struct data_update_opts *data_opts)
-{
-	return drop_extra_replicas_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key),
-					io_opts, data_opts);
-}
-
 static bool scrub_pred(struct bch_fs *c, void *_arg,
 		       enum btree_id btree, struct bkey_s_c k,
 		       struct bch_io_opts *io_opts,
@@ -1359,11 +1342,7 @@ int bch2_data_job(struct bch_fs *c,
 	case BCH_DATA_OP_rereplicate:
 		stats->data_type = BCH_DATA_journal;
 		ret = bch2_journal_flush_device_pins(&c->journal, -1);
-		ret = bch2_move_btree(c, start, end,
-				      rereplicate_btree_pred, c, stats) ?: ret;
-		ret = bch2_move_data(c, start, end,
-				     NULL,
-				     stats,
+		ret = bch2_move_data(c, start, end, 0, NULL, stats,
 				     writepoint_hashed((unsigned long) current),
 				     true,
 				     rereplicate_pred, c) ?: ret;
@@ -1389,12 +1368,10 @@ int bch2_data_job(struct bch_fs *c,
 		ret = bch2_scan_old_btree_nodes(c, stats);
 		break;
 	case BCH_DATA_OP_drop_extra_replicas:
-		ret = bch2_move_btree(c, start, end,
-				drop_extra_replicas_btree_pred, c, stats) ?: ret;
-		ret = bch2_move_data(c, start, end, NULL, stats,
-				writepoint_hashed((unsigned long) current),
-				true,
-				drop_extra_replicas_pred, c) ?: ret;
+		ret = bch2_move_data(c, start, end, 0, NULL, stats,
+				     writepoint_hashed((unsigned long) current),
+				     true,
+				     drop_extra_replicas_pred, c) ?: ret;
 		ret = bch2_replicas_gc2(c) ?: ret;
 		break;
 	default:
diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h
index fe92ca6d418d..481026ff99ab 100644
--- a/fs/bcachefs/move.h
+++ b/fs/bcachefs/move.h
@@ -128,18 +128,6 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *,
 
 int bch2_move_data_btree(struct moving_context *, struct bpos, struct bpos,
 			 move_pred_fn, void *, enum btree_id, unsigned);
-int __bch2_move_data(struct moving_context *,
-		     struct bbpos,
-		     struct bbpos,
-		     move_pred_fn, void *);
-int bch2_move_data(struct bch_fs *,
-		   struct bbpos start,
-		   struct bbpos end,
-		   struct bch_ratelimit *,
-		   struct bch_move_stats *,
-		   struct write_point_specifier,
-		   bool,
-		   move_pred_fn, void *);
 
 int bch2_move_data_phys(struct bch_fs *, unsigned, u64, u64, unsigned,
 			struct bch_ratelimit *, struct bch_move_stats *,
-- 
2.50.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/4] bcachefs: rereplicate flushes interior updates
  2025-07-18  6:47 [PATCH 1/4] bcachefs: Ensure we don't return with closure on waitlist Kent Overstreet
  2025-07-18  6:47 ` [PATCH 2/4] bcachefs: bch2_move_data() now walks btree nodes Kent Overstreet
@ 2025-07-18  6:47 ` Kent Overstreet
  2025-07-18  6:47 ` [PATCH 4/4] bcachefs: can_use_btree_node() Kent Overstreet
  2 siblings, 0 replies; 4+ messages in thread
From: Kent Overstreet @ 2025-07-18  6:47 UTC (permalink / raw)
  To: linux-bcachefs; +Cc: Kent Overstreet

We need to flush interior updates before gcing replicas entries,
otherwise we might still see replicas entries from in flight btree
updates.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/move.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 3419e406f0c7..54dd6fec81db 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -1346,6 +1346,7 @@ int bch2_data_job(struct bch_fs *c,
 				     writepoint_hashed((unsigned long) current),
 				     true,
 				     rereplicate_pred, c) ?: ret;
+		bch2_btree_interior_updates_flush(c);
 		ret = bch2_replicas_gc2(c) ?: ret;
 		break;
 	case BCH_DATA_OP_migrate:
-- 
2.50.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 4/4] bcachefs: can_use_btree_node()
  2025-07-18  6:47 [PATCH 1/4] bcachefs: Ensure we don't return with closure on waitlist Kent Overstreet
  2025-07-18  6:47 ` [PATCH 2/4] bcachefs: bch2_move_data() now walks btree nodes Kent Overstreet
  2025-07-18  6:47 ` [PATCH 3/4] bcachefs: rereplicate flushes interior updates Kent Overstreet
@ 2025-07-18  6:47 ` Kent Overstreet
  2 siblings, 0 replies; 4+ messages in thread
From: Kent Overstreet @ 2025-07-18  6:47 UTC (permalink / raw)
  To: linux-bcachefs; +Cc: Kent Overstreet

Make sure btree_node_alloc() respects allocation policy when allocating
from the btree node reserve.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/btree_update_interior.c | 41 ++++++++++++++++++++++++++---
 fs/bcachefs/extents.c               | 14 ++++++++++
 fs/bcachefs/extents.h               |  2 ++
 3 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 312ef203b27b..e4aa4fa749bc 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -14,6 +14,7 @@
 #include "btree_locking.h"
 #include "buckets.h"
 #include "clock.h"
+#include "disk_groups.h"
 #include "enumerated_ref.h"
 #include "error.h"
 #include "extents.h"
@@ -277,6 +278,36 @@ static void bch2_btree_node_free_never_used(struct btree_update *as,
 	bch2_trans_node_drop(trans, b);
 }
 
+static bool can_use_btree_node(struct bch_fs *c,
+			       struct disk_reservation *res,
+			       unsigned target,
+			       struct bkey_s_c k)
+{
+	if (!bch2_bkey_devs_rw(c, k))
+		return false;
+
+	if (target && !bch2_bkey_in_target(c, k, target))
+		return false;
+
+	unsigned durability = bch2_bkey_durability(c, k);
+
+	if (durability >= res->nr_replicas)
+		return true;
+
+	struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_btree, target);
+
+	guard(rcu)();
+
+	unsigned durability_available = 0, i;
+	for_each_set_bit(i, devs.d, BCH_SB_MEMBERS_MAX) {
+		struct bch_dev *ca = bch2_dev_rcu_noerror(c, i);
+		if (ca)
+			durability_available += ca->mi.durability;
+	}
+
+	return durability >= durability_available;
+}
+
 static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
 					     struct disk_reservation *res,
 					     struct closure *cl,
@@ -303,10 +334,14 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
 	mutex_lock(&c->btree_reserve_cache_lock);
 	if (c->btree_reserve_cache_nr > nr_reserve) {
 		for (struct btree_alloc *a = c->btree_reserve_cache;
-		     a < c->btree_reserve_cache + c->btree_reserve_cache_nr;
-		     a++) {
-			if (target && !bch2_bkey_in_target(c, bkey_i_to_s_c(&a->k), target))
+		     a < c->btree_reserve_cache + c->btree_reserve_cache_nr;) {
+			/* check if it has sufficient durability */
+
+			if (!can_use_btree_node(c, res, target, bkey_i_to_s_c(&a->k))) {
+				bch2_open_buckets_put(c, &a->ob);
+				*a = c->btree_reserve_cache[--c->btree_reserve_cache_nr];
 				continue;
+			}
 
 			bkey_copy(&b->key, &a->k);
 			b->ob = a->ob;
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index b36ecfc0ab9d..8152ef1cbbcd 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -1006,6 +1006,20 @@ const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned
 	return NULL;
 }
 
+bool bch2_bkey_devs_rw(struct bch_fs *c, struct bkey_s_c k)
+{
+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+
+	guard(rcu)();
+	bkey_for_each_ptr(ptrs, ptr) {
+		CLASS(bch2_dev_tryget, ca)(c, ptr->dev);
+		if (!ca || ca->mi.state != BCH_MEMBER_STATE_rw)
+			return false;
+	}
+
+	return true;
+}
+
 bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target)
 {
 	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index f212f91c278d..35ee03cd5065 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -614,6 +614,8 @@ static inline struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s k, unsig
 	return (void *) bch2_bkey_has_device_c(k.s_c, dev);
 }
 
+bool bch2_bkey_devs_rw(struct bch_fs *, struct bkey_s_c);
+
 bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned);
 bool bch2_bkey_in_target(struct bch_fs *, struct bkey_s_c, unsigned);
 
-- 
2.50.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-07-18  6:47 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-18  6:47 [PATCH 1/4] bcachefs: Ensure we don't return with closure on waitlist Kent Overstreet
2025-07-18  6:47 ` [PATCH 2/4] bcachefs: bch2_move_data() now walks btree nodes Kent Overstreet
2025-07-18  6:47 ` [PATCH 3/4] bcachefs: rereplicate flushes interior updates Kent Overstreet
2025-07-18  6:47 ` [PATCH 4/4] bcachefs: can_use_btree_node() Kent Overstreet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).