From: Kent Overstreet <kent.overstreet@linux.dev>
To: linux-bcachefs@vger.kernel.org
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Subject: [PATCH 8/8] bcachefs: bch2_check_bucket_backpointer_mismatch()
Date: Sat, 17 May 2025 15:25:45 -0400 [thread overview]
Message-ID: <20250517192547.3849149-9-kent.overstreet@linux.dev> (raw)
In-Reply-To: <20250517192547.3849149-1-kent.overstreet@linux.dev>
Detect buckets with missing backpointers, and run repair on demand.
__bch2_move_data_phys() now calls
bch2_check_bucket_backpointer_mismatch() as it walks buckets, which
checks for missing backpointers by comparing backpointers against bucket
sector counts.
When missing backpointers are detected, we kick off
bch2_check_extents_to_backpointers() asynchronously - right away if
we're trying to evacuate, or with a threshold if we're just running
copygc.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/alloc_background.c | 7 +++-
fs/bcachefs/backpointers.c | 75 +++++++++++++++++++++++++++++++---
fs/bcachefs/backpointers.h | 3 +-
fs/bcachefs/move.c | 21 ++++++++--
fs/bcachefs/movinggc.c | 3 ++
5 files changed, 98 insertions(+), 11 deletions(-)
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 88e710ba2685..a38b9c6c891e 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -2175,8 +2175,11 @@ static int invalidate_one_bucket(struct btree_trans *trans,
BUG_ON(a->data_type != BCH_DATA_cached);
BUG_ON(a->dirty_sectors);
- if (!a->cached_sectors)
- bch_err(c, "invalidating empty bucket, confused");
+ if (!a->cached_sectors) {
+ bch2_check_bucket_backpointer_mismatch(trans, ca, bucket.offset,
+ true, last_flushed);
+ goto out;
+ }
unsigned cached_sectors = a->cached_sectors;
u8 gen = a->gen;
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 6b98ce1ed6c9..c08bc6685078 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -12,6 +12,7 @@
#include "disk_accounting.h"
#include "error.h"
#include "progress.h"
+#include "recovery_passes.h"
#include <linux/mm.h>
@@ -804,6 +805,13 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
return ret;
}
+static inline int bch2_fs_going_ro(struct bch_fs *c)
+{
+ return test_bit(BCH_FS_going_ro, &c->flags)
+ ? -EROFS
+ : 0;
+}
+
static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
struct extents_to_bp_state *s)
{
@@ -831,6 +839,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
ret = for_each_btree_key_continue(trans, iter, 0, k, ({
bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
+ bch2_fs_going_ro(c) ?:
check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
}));
@@ -870,6 +879,7 @@ static int data_type_to_alloc_counter(enum bch_data_type t)
static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos);
static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k,
+ bool *had_mismatch,
struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
@@ -877,6 +887,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
bool need_commit = false;
+ *had_mismatch = false;
+
if (a->data_type == BCH_DATA_sb ||
a->data_type == BCH_DATA_journal ||
a->data_type == BCH_DATA_parity)
@@ -957,6 +969,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
? bch2_bucket_bitmap_set(ca, &ca->bucket_backpointer_empty,
alloc_k.k->p.offset)
: 0);
+
+ *had_mismatch = true;
}
err:
bch2_dev_put(ca);
@@ -1104,7 +1118,9 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
ret = for_each_btree_key(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k, ({
- check_bucket_backpointer_mismatch(trans, k, &s.last_flushed);
+ bool had_mismatch;
+ bch2_fs_going_ro(c) ?:
+ check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed);
}));
if (ret)
goto err;
@@ -1150,20 +1166,69 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
s.bp_start = bpos_successor(s.bp_end);
}
-err:
- bch2_trans_put(trans);
- bch2_bkey_buf_exit(&s.last_flushed, c);
- bch2_btree_cache_unpin(c);
for_each_member_device(c, ca) {
bch2_bucket_bitmap_free(&ca->bucket_backpointer_mismatch);
bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty);
}
+err:
+ bch2_trans_put(trans);
+ bch2_bkey_buf_exit(&s.last_flushed, c);
+ bch2_btree_cache_unpin(c);
bch_err_fn(c, ret);
return ret;
}
+static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans,
+ struct bpos bucket,
+ bool *had_mismatch,
+ struct bkey_buf *last_flushed)
+{
+ struct btree_iter alloc_iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &alloc_iter,
+ BTREE_ID_alloc, bucket,
+ BTREE_ITER_cached);
+ int ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ ret = check_bucket_backpointer_mismatch(trans, k, had_mismatch, last_flushed);
+ bch2_trans_iter_exit(trans, &alloc_iter);
+ return ret;
+}
+
+int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans,
+ struct bch_dev *ca, u64 bucket,
+ bool copygc,
+ struct bkey_buf *last_flushed)
+{
+ struct bch_fs *c = trans->c;
+ bool had_mismatch;
+ int ret = lockrestart_do(trans,
+ check_bucket_backpointer_pos_mismatch(trans, POS(ca->dev_idx, bucket),
+ &had_mismatch, last_flushed));
+ if (ret || !had_mismatch)
+ return ret;
+
+ u64 nr = ca->bucket_backpointer_mismatch.nr;
+ u64 allowed = copygc ? ca->mi.nbuckets >> 7 : 0;
+
+ struct printbuf buf = PRINTBUF;
+ __bch2_log_msg_start(ca->name, &buf);
+
+ prt_printf(&buf, "Detected missing backpointers in bucket %llu, now have %llu/%llu with missing\n",
+ bucket, nr, ca->mi.nbuckets);
+
+ bch2_run_explicit_recovery_pass(c, &buf,
+ BCH_RECOVERY_PASS_check_extents_to_backpointers,
+ nr < allowed ? RUN_RECOVERY_PASS_ratelimit : 0);
+
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ return 0;
+}
+
/* backpointers -> extents */
static int check_one_backpointer(struct btree_trans *trans,
diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h
index fe7149a2fbf5..6840561084ce 100644
--- a/fs/bcachefs/backpointers.h
+++ b/fs/bcachefs/backpointers.h
@@ -182,7 +182,8 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_b
struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer,
struct btree_iter *, struct bkey_buf *);
-int bch2_check_bucket_backpointer_mismatch(struct btree_trans *, struct bpos, struct bkey_buf *);
+int bch2_check_bucket_backpointer_mismatch(struct btree_trans *, struct bch_dev *, u64,
+ bool, struct bkey_buf *);
int bch2_check_btree_backpointers(struct bch_fs *);
int bch2_check_extents_to_backpointers(struct bch_fs *);
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 49898d5743d4..0dd3bec3acff 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -815,6 +815,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
u64 bucket_start,
u64 bucket_end,
unsigned data_types,
+ bool copygc,
move_pred_fn pred, void *arg)
{
struct btree_trans *trans = ctxt->trans;
@@ -825,6 +826,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
struct bkey_buf sk;
struct bkey_s_c k;
struct bkey_buf last_flushed;
+ u64 check_mismatch_done = bucket_start;
int ret = 0;
struct bch_dev *ca = bch2_dev_tryget(c, dev);
@@ -835,8 +837,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
struct bpos bp_start = bucket_pos_to_bp_start(ca, POS(dev, bucket_start));
struct bpos bp_end = bucket_pos_to_bp_end(ca, POS(dev, bucket_end));
- bch2_dev_put(ca);
- ca = NULL;
bch2_bkey_buf_init(&last_flushed);
bkey_init(&last_flushed.k->k);
@@ -871,6 +871,14 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
if (!k.k || bkey_gt(k.k->p, bp_end))
break;
+ if (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) {
+ while (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) {
+ bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++,
+ copygc, &last_flushed);
+ }
+ continue;
+ }
+
if (k.k->type != KEY_TYPE_backpointer)
goto next;
@@ -946,10 +954,15 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
next:
bch2_btree_iter_advance(trans, &bp_iter);
}
+
+ while (check_mismatch_done < bucket_end)
+ bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++,
+ copygc, &last_flushed);
err:
bch2_trans_iter_exit(trans, &bp_iter);
bch2_bkey_buf_exit(&sk, c);
bch2_bkey_buf_exit(&last_flushed, c);
+ bch2_dev_put(ca);
return ret;
}
@@ -974,7 +987,8 @@ int bch2_move_data_phys(struct bch_fs *c,
ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys;
}
- int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg);
+ int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end,
+ data_types, false, pred, arg);
bch2_moving_ctxt_exit(&ctxt);
return ret;
@@ -1019,6 +1033,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
bucket.offset,
bucket.offset + 1,
~0,
+ true,
evacuate_bucket_pred, &arg);
}
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 0a751a65386f..7cb0b3d347b4 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -75,6 +75,9 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
if (!ca)
goto out;
+ if (bch2_bucket_bitmap_test(&ca->bucket_backpointer_mismatch, b->k.bucket.offset))
+ goto out;
+
if (ca->mi.state != BCH_MEMBER_STATE_rw ||
!bch2_dev_is_online(ca))
goto out;
--
2.49.0
prev parent reply other threads:[~2025-05-17 19:26 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-17 19:25 [PATCH 0/8] Runtime self healing for missing backpointers Kent Overstreet
2025-05-17 19:25 ` [PATCH 1/8] bcachefs: struct bch_fs_recovery Kent Overstreet
2025-05-17 19:25 ` [PATCH 2/8] bcachefs: __bch2_run_recovery_passes() Kent Overstreet
2025-05-17 19:25 ` [PATCH 3/8] bcachefs: Reduce usage of recovery.curr_pass Kent Overstreet
2025-05-17 19:25 ` [PATCH 4/8] bcachefs: bch2_recovery_pass_status_to_text() Kent Overstreet
2025-05-17 19:25 ` [PATCH 5/8] bcachefs: bch2_run_explicit_recovery_pass() cleanup Kent Overstreet
2025-05-17 19:25 ` [PATCH 6/8] bcachefs: Run recovery passes asynchronously Kent Overstreet
2025-05-17 19:25 ` [PATCH 7/8] bcachefs: Improve bucket_bitmap code Kent Overstreet
2025-05-17 19:25 ` Kent Overstreet [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250517192547.3849149-9-kent.overstreet@linux.dev \
--to=kent.overstreet@linux.dev \
--cc=linux-bcachefs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.