* [PATCH 01/34] bcachefs: BCH_ERR_btree_node_read_error_cached
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 02/34] bcachefs: Use separate rhltable for bch2_inode_or_descendents_is_open() Kent Overstreet
` (32 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/btree_cache.c | 6 +++---
fs/bcachefs/errcode.h | 1 +
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index a0a406b0c7bc..36dfa6a48aa6 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -1131,7 +1131,7 @@ static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btr
if (unlikely(btree_node_read_error(b))) {
six_unlock_type(&b->c.lock, lock_type);
- return ERR_PTR(-BCH_ERR_btree_node_read_error);
+ return ERR_PTR(-BCH_ERR_btree_node_read_err_cached);
}
EBUG_ON(b->c.btree_id != path->btree_id);
@@ -1221,7 +1221,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
if (unlikely(btree_node_read_error(b))) {
six_unlock_type(&b->c.lock, lock_type);
- return ERR_PTR(-BCH_ERR_btree_node_read_error);
+ return ERR_PTR(-BCH_ERR_btree_node_read_err_cached);
}
EBUG_ON(b->c.btree_id != path->btree_id);
@@ -1303,7 +1303,7 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
if (unlikely(btree_node_read_error(b))) {
six_unlock_read(&b->c.lock);
- b = ERR_PTR(-BCH_ERR_btree_node_read_error);
+ b = ERR_PTR(-BCH_ERR_btree_node_read_err_cached);
goto out;
}
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 2dda7f962e5b..131b9bef21a0 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -242,6 +242,7 @@
x(BCH_ERR_invalid, invalid_bkey) \
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
x(EIO, btree_node_read_err) \
+ x(BCH_ERR_btree_node_read_err, btree_node_read_err_cached) \
x(EIO, sb_not_downgraded) \
x(EIO, btree_node_write_all_failed) \
x(EIO, btree_node_read_error) \
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 02/34] bcachefs: Use separate rhltable for bch2_inode_or_descendents_is_open()
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
2024-11-29 20:27 ` [PATCH 01/34] bcachefs: BCH_ERR_btree_node_read_error_cached Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 03/34] bcachefs: errcode cleanup: journal errors Kent Overstreet
` (31 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/bcachefs.h | 1 +
fs/bcachefs/fs.c | 39 ++++++++++++++++++++++++++++++---------
fs/bcachefs/fs.h | 1 +
3 files changed, 32 insertions(+), 9 deletions(-)
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 11f9ed42a9da..f1d8c821d27a 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -1020,6 +1020,7 @@ struct bch_fs {
struct list_head vfs_inodes_list;
struct mutex vfs_inodes_lock;
struct rhashtable vfs_inodes_table;
+ struct rhltable vfs_inodes_by_inum_table;
/* VFS IO PATH - fs-io.c */
struct bio_set writepage_bioset;
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 50d323fca001..c6e7df7c67fa 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -39,6 +39,7 @@
#include <linux/posix_acl.h>
#include <linux/random.h>
#include <linux/seq_file.h>
+#include <linux/siphash.h>
#include <linux/statfs.h>
#include <linux/string.h>
#include <linux/xattr.h>
@@ -176,8 +177,9 @@ static bool subvol_inum_eq(subvol_inum a, subvol_inum b)
static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed)
{
const subvol_inum *inum = data;
+ siphash_key_t k = { .key[0] = seed };
- return jhash(&inum->inum, sizeof(inum->inum), seed);
+ return siphash_2u64(inum->subvol, inum->inum, &k);
}
static u32 bch2_vfs_inode_obj_hash_fn(const void *data, u32 len, u32 seed)
@@ -206,11 +208,18 @@ static const struct rhashtable_params bch2_vfs_inodes_params = {
.automatic_shrinking = true,
};
+static const struct rhashtable_params bch2_vfs_inodes_by_inum_params = {
+ .head_offset = offsetof(struct bch_inode_info, by_inum_hash),
+ .key_offset = offsetof(struct bch_inode_info, ei_inum.inum),
+ .key_len = sizeof(u64),
+ .automatic_shrinking = true,
+};
+
int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p)
{
struct bch_fs *c = trans->c;
- struct rhashtable *ht = &c->vfs_inodes_table;
- subvol_inum inum = (subvol_inum) { .inum = p.offset };
+ struct rhltable *ht = &c->vfs_inodes_by_inum_table;
+ u64 inum = p.offset;
DARRAY(u32) subvols;
int ret = 0;
@@ -235,15 +244,15 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p)
struct rhash_lock_head __rcu *const *bkt;
struct rhash_head *he;
unsigned int hash;
- struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+ struct bucket_table *tbl = rht_dereference_rcu(ht->ht.tbl, &ht->ht);
restart:
- hash = rht_key_hashfn(ht, tbl, &inum, bch2_vfs_inodes_params);
+ hash = rht_key_hashfn(&ht->ht, tbl, &inum, bch2_vfs_inodes_by_inum_params);
bkt = rht_bucket(tbl, hash);
do {
struct bch_inode_info *inode;
rht_for_each_entry_rcu_from(inode, he, rht_ptr_rcu(bkt), tbl, hash, hash) {
- if (inode->ei_inum.inum == inum.inum) {
+ if (inode->ei_inum.inum == inum) {
ret = darray_push_gfp(&subvols, inode->ei_inum.subvol,
GFP_NOWAIT|__GFP_NOWARN);
if (ret) {
@@ -264,7 +273,7 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p)
/* Ensure we see any new tables. */
smp_rmb();
- tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ tbl = rht_dereference_rcu(tbl->future_tbl, &ht->ht);
if (unlikely(tbl))
goto restart;
rcu_read_unlock();
@@ -343,7 +352,11 @@ static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inod
spin_unlock(&inode->v.i_lock);
if (remove) {
- int ret = rhashtable_remove_fast(&c->vfs_inodes_table,
+ int ret = rhltable_remove(&c->vfs_inodes_by_inum_table,
+ &inode->by_inum_hash, bch2_vfs_inodes_by_inum_params);
+ BUG_ON(ret);
+
+ ret = rhashtable_remove_fast(&c->vfs_inodes_table,
&inode->hash, bch2_vfs_inodes_params);
BUG_ON(ret);
inode->v.i_hash.pprev = NULL;
@@ -388,6 +401,11 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c,
discard_new_inode(&inode->v);
return old;
} else {
+ int ret = rhltable_insert(&c->vfs_inodes_by_inum_table,
+ &inode->by_inum_hash,
+ bch2_vfs_inodes_by_inum_params);
+ BUG_ON(ret);
+
inode_fake_hash(&inode->v);
inode_sb_list_add(&inode->v);
@@ -2359,13 +2377,16 @@ static int bch2_init_fs_context(struct fs_context *fc)
void bch2_fs_vfs_exit(struct bch_fs *c)
{
+ if (c->vfs_inodes_by_inum_table.ht.tbl)
+ rhltable_destroy(&c->vfs_inodes_by_inum_table);
if (c->vfs_inodes_table.tbl)
rhashtable_destroy(&c->vfs_inodes_table);
}
int bch2_fs_vfs_init(struct bch_fs *c)
{
- return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params);
+ return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params) ?:
+ rhltable_init(&c->vfs_inodes_by_inum_table, &bch2_vfs_inodes_by_inum_params);
}
static struct file_system_type bcache_fs_type = {
diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h
index 59f9f7ae728d..dd2198541455 100644
--- a/fs/bcachefs/fs.h
+++ b/fs/bcachefs/fs.h
@@ -14,6 +14,7 @@
struct bch_inode_info {
struct inode v;
struct rhash_head hash;
+ struct rhlist_head by_inum_hash;
subvol_inum ei_inum;
struct list_head ei_vfs_inode_list;
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 03/34] bcachefs: errcode cleanup: journal errors
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
2024-11-29 20:27 ` [PATCH 01/34] bcachefs: BCH_ERR_btree_node_read_error_cached Kent Overstreet
2024-11-29 20:27 ` [PATCH 02/34] bcachefs: Use separate rhltable for bch2_inode_or_descendents_is_open() Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 04/34] bcachefs: disk_accounting: bch2_dev_rcu -> bch2_dev_rcu_noerror Kent Overstreet
` (30 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Instead of throwing standard error codes, we should be throwing
dedicated private error codes, this greatly improves debugability.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/errcode.h | 2 ++
fs/bcachefs/journal.c | 4 ++--
fs/bcachefs/journal.h | 2 +-
3 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 131b9bef21a0..c989ce4f715f 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -241,6 +241,8 @@
x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \
x(BCH_ERR_invalid, invalid_bkey) \
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
+ x(EIO, journal_shutdown) \
+ x(EIO, journal_flush_err) \
x(EIO, btree_node_read_err) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_cached) \
x(EIO, sb_not_downgraded) \
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 699db0d0749a..bbdd0b17ae69 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -673,7 +673,7 @@ void bch2_journal_entry_res_resize(struct journal *j,
* @seq: seq to flush
* @parent: closure object to wait with
* Returns: 1 if @seq has already been flushed, 0 if @seq is being flushed,
- * -EIO if @seq will never be flushed
+ * -BCH_ERR_journal_flush_err if @seq will never be flushed
*
* Like bch2_journal_wait_on_seq, except that it triggers a write immediately if
* necessary
@@ -696,7 +696,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq,
/* Recheck under lock: */
if (j->err_seq && seq >= j->err_seq) {
- ret = -EIO;
+ ret = -BCH_ERR_journal_flush_err;
goto out;
}
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index 71a50846967f..a6a2e888c59b 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -412,7 +412,7 @@ void bch2_journal_halt(struct journal *);
static inline int bch2_journal_error(struct journal *j)
{
return j->reservations.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL
- ? -EIO : 0;
+ ? -BCH_ERR_journal_shutdown : 0;
}
struct bch_dev;
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 04/34] bcachefs: disk_accounting: bch2_dev_rcu -> bch2_dev_rcu_noerror
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (2 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 03/34] bcachefs: errcode cleanup: journal errors Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 05/34] bcachefs: Fix accounting_read when we rewind Kent Overstreet
` (29 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Accounting keys that reference invalid devices are corrected by fsck,
they shouldn't cause an emergency shutdown.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/disk_accounting.c | 4 ++--
fs/bcachefs/disk_accounting.h | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index 55a00018dc8b..fa821d278c45 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -805,7 +805,7 @@ int bch2_accounting_read(struct bch_fs *c)
break;
case BCH_DISK_ACCOUNTING_dev_data_type:
rcu_read_lock();
- struct bch_dev *ca = bch2_dev_rcu(c, k.dev_data_type.dev);
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev);
if (ca) {
struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type];
percpu_u64_set(&d->buckets, v[0]);
@@ -911,7 +911,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
break;
case BCH_DISK_ACCOUNTING_dev_data_type: {
rcu_read_lock();
- struct bch_dev *ca = bch2_dev_rcu(c, acc_k.dev_data_type.dev);
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev);
if (!ca) {
rcu_read_unlock();
continue;
diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h
index 6639535dc91c..8b2b2f83e6a4 100644
--- a/fs/bcachefs/disk_accounting.h
+++ b/fs/bcachefs/disk_accounting.h
@@ -142,7 +142,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans,
break;
case BCH_DISK_ACCOUNTING_dev_data_type:
rcu_read_lock();
- struct bch_dev *ca = bch2_dev_rcu(c, acc_k.dev_data_type.dev);
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev);
if (ca) {
this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].buckets, a.v->d[0]);
this_cpu_add(ca->usage->d[acc_k.dev_data_type.data_type].sectors, a.v->d[1]);
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 05/34] bcachefs: Fix accounting_read when we rewind
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (3 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 04/34] bcachefs: disk_accounting: bch2_dev_rcu -> bch2_dev_rcu_noerror Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 06/34] bcachefs: backpointer_to_missing_ptr is now autofix Kent Overstreet
` (28 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
If we rewind recovery to run topology repair, that causes
accounting_read to run twice.
This fixes accounting being double counted.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/disk_accounting.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index fa821d278c45..bb5dbbf71d04 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -700,6 +700,21 @@ int bch2_accounting_read(struct bch_fs *c)
struct btree_trans *trans = bch2_trans_get(c);
struct printbuf buf = PRINTBUF;
+ /*
+ * We might run more than once if we rewind to start topology repair or
+ * btree node scan - and those might cause us to get different results,
+ * so we can't just skip if we've already run.
+ *
+ * Instead, zero out any accounting we have:
+ */
+ percpu_down_write(&c->mark_lock);
+ darray_for_each(acc->k, e)
+ percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters);
+ for_each_member_device(c, ca)
+ percpu_memset(ca->usage, 0, sizeof(*ca->usage));
+ percpu_memset(c->usage, 0, sizeof(*c->usage));
+ percpu_up_write(&c->mark_lock);
+
int ret = for_each_btree_key(trans, iter,
BTREE_ID_accounting, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 06/34] bcachefs: backpointer_to_missing_ptr is now autofix
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (4 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 05/34] bcachefs: Fix accounting_read when we rewind Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 07/34] bcachefs: Fix btree node scan when unknown btree IDs are present Kent Overstreet
` (27 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/sb-errors_format.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 9e3425f533bc..d45d0789f1b1 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -141,7 +141,7 @@ enum bch_fsck_flags {
x(backpointer_dev_bad, 297, 0) \
x(backpointer_to_missing_device, 126, 0) \
x(backpointer_to_missing_alloc, 127, 0) \
- x(backpointer_to_missing_ptr, 128, 0) \
+ x(backpointer_to_missing_ptr, 128, FSCK_AUTOFIX) \
x(lru_entry_at_time_0, 129, FSCK_AUTOFIX) \
x(lru_entry_to_invalid_bucket, 130, FSCK_AUTOFIX) \
x(lru_entry_bad, 131, FSCK_AUTOFIX) \
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 07/34] bcachefs: Fix btree node scan when unknown btree IDs are present
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (5 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 06/34] bcachefs: backpointer_to_missing_ptr is now autofix Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 08/34] bcachefs: Kill bch2_bucket_alloc_new_fs() Kent Overstreet
` (26 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+1f202d4da221ec6ebf8e
btree_root entries for unknown btree IDs are created during recovery,
before reading those btree roots.
But btree_node_scan may find btree nodes with unknown btree IDs when we
haven't seen roots for those btrees.
Reported-by: syzbot+1f202d4da221ec6ebf8e@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/btree_cache.c | 11 ++++++++---
fs/bcachefs/btree_cache.h | 9 +++++++--
2 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 36dfa6a48aa6..1f06e24e53fc 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -1406,9 +1406,14 @@ void bch2_btree_id_level_to_text(struct printbuf *out, enum btree_id btree, unsi
void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b)
{
bch2_btree_id_to_text(out, b->c.btree_id);
- prt_printf(out, " level %u/%u\n ",
- b->c.level,
- bch2_btree_id_root(c, b->c.btree_id)->level);
+ prt_printf(out, " level %u/", b->c.level);
+ struct btree_root *r = bch2_btree_id_root(c, b->c.btree_id);
+ if (r)
+ prt_printf(out, "%u", r->level);
+ else
+ prt_printf(out, "(unknown)");
+ prt_printf(out, "\n ");
+
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key));
}
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index 6cfacacb6769..dcc34fe4996d 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -128,14 +128,19 @@ static inline struct btree_root *bch2_btree_id_root(struct bch_fs *c, unsigned i
} else {
unsigned idx = id - BTREE_ID_NR;
- EBUG_ON(idx >= c->btree_roots_extra.nr);
+ /* This can happen when we're called from btree_node_scan */
+ if (idx >= c->btree_roots_extra.nr)
+ return NULL;
+
return &c->btree_roots_extra.data[idx];
}
}
static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b)
{
- return bch2_btree_id_root(c, b->c.btree_id)->b;
+ struct btree_root *r = bch2_btree_id_root(c, b->c.btree_id);
+
+ return r ? r->b : NULL;
}
const char *bch2_btree_id_str(enum btree_id); /* avoid */
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 08/34] bcachefs: Kill bch2_bucket_alloc_new_fs()
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (6 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 07/34] bcachefs: Fix btree node scan when unknown btree IDs are present Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 09/34] bcachefs: Bad btree roots are now autofix Kent Overstreet
` (25 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+592425844580a6598410
The early-early allocation path, bch2_bucket_alloc_new_fs(), is no
longer needed - and inconsistencies around new_fs_bucket_idx have been a
frequent source of bugs.
Reported-by: syzbot+592425844580a6598410@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/alloc_foreground.c | 40 ++++++++++++++--------------------
fs/bcachefs/alloc_foreground.h | 2 --
fs/bcachefs/bcachefs.h | 1 -
fs/bcachefs/buckets.c | 25 +++++++++++++++++++++
fs/bcachefs/buckets.h | 21 +-----------------
fs/bcachefs/journal.c | 34 +++++++++++++----------------
fs/bcachefs/journal_reclaim.c | 3 +++
fs/bcachefs/recovery.c | 5 +----
fs/bcachefs/super.c | 12 +++++-----
9 files changed, 66 insertions(+), 77 deletions(-)
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 6d665b720f72..4d1ff7f1f302 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -156,6 +156,14 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
return ob;
}
+static inline bool is_superblock_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b)
+{
+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_trans_mark_dev_sbs)
+ return false;
+
+ return bch2_is_superblock_bucket(ca, b);
+}
+
static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
{
BUG_ON(c->open_buckets_partial_nr >=
@@ -175,20 +183,6 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
closure_wake_up(&c->freelist_wait);
}
-/* _only_ for allocating the journal on a new device: */
-long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
-{
- while (ca->new_fs_bucket_idx < ca->mi.nbuckets) {
- u64 b = ca->new_fs_bucket_idx++;
-
- if (!is_superblock_bucket(ca, b) &&
- (!ca->buckets_nouse || !test_bit(b, ca->buckets_nouse)))
- return b;
- }
-
- return -1;
-}
-
static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
{
switch (watermark) {
@@ -214,6 +208,9 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
{
struct open_bucket *ob;
+ if (unlikely(is_superblock_bucket(c, ca, bucket)))
+ return NULL;
+
if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) {
s->skipped_nouse++;
return NULL;
@@ -295,9 +292,6 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
/*
* This path is for before the freespace btree is initialized:
- *
- * If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock &
- * journal buckets - journal buckets will be < ca->new_fs_bucket_idx
*/
static noinline struct open_bucket *
bch2_bucket_alloc_early(struct btree_trans *trans,
@@ -309,7 +303,7 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
struct btree_iter iter, citer;
struct bkey_s_c k, ck;
struct open_bucket *ob = NULL;
- u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
+ u64 first_bucket = ca->mi.first_bucket;
u64 *dev_alloc_cursor = &ca->alloc_cursor[s->btree_bitmap];
u64 alloc_start = max(first_bucket, *dev_alloc_cursor);
u64 alloc_cursor = alloc_start;
@@ -332,10 +326,6 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets)))
break;
- if (ca->new_fs_bucket_idx &&
- is_superblock_bucket(ca, k.k->p.offset))
- continue;
-
if (s->btree_bitmap != BTREE_BITMAP_ANY &&
s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca,
bucket_to_sector(ca, bucket), ca->mi.bucket_size)) {
@@ -406,8 +396,6 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(*dev_alloc_cursor));
u64 alloc_cursor = alloc_start;
int ret;
-
- BUG_ON(ca->new_fs_bucket_idx);
again:
for_each_btree_key_max_norestart(trans, iter, BTREE_ID_freespace,
POS(ca->dev_idx, alloc_cursor),
@@ -551,6 +539,10 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
bch2_dev_do_invalidates(ca);
if (!avail) {
+ if (watermark > BCH_WATERMARK_normal &&
+ c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations)
+ goto alloc;
+
if (cl && !waiting) {
closure_wait(&c->freelist_wait, cl);
waiting = true;
diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
index 1a16fd5bd4f8..4f87745df97e 100644
--- a/fs/bcachefs/alloc_foreground.h
+++ b/fs/bcachefs/alloc_foreground.h
@@ -28,8 +28,6 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *,
struct bch_devs_mask *);
void bch2_dev_stripe_increment(struct bch_dev *, struct dev_stripe_state *);
-long bch2_bucket_alloc_new_fs(struct bch_dev *);
-
static inline struct bch_dev *ob_dev(struct bch_fs *c, struct open_bucket *ob)
{
return bch2_dev_have_ref(c, ob->dev);
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index f1d8c821d27a..a85b3bcc6383 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -560,7 +560,6 @@ struct bch_dev {
struct bch_dev_usage __percpu *usage;
/* Allocator: */
- u64 new_fs_bucket_idx;
u64 alloc_cursor[3];
unsigned nr_open_buckets;
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 1547141ba2a0..afd35c93fcfb 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -1161,6 +1161,31 @@ int bch2_trans_mark_dev_sbs(struct bch_fs *c)
return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_transactional);
}
+bool bch2_is_superblock_bucket(struct bch_dev *ca, u64 b)
+{
+ struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
+ u64 b_offset = bucket_to_sector(ca, b);
+ u64 b_end = bucket_to_sector(ca, b + 1);
+ unsigned i;
+
+ if (!b)
+ return true;
+
+ for (i = 0; i < layout->nr_superblocks; i++) {
+ u64 offset = le64_to_cpu(layout->sb_offset[i]);
+ u64 end = offset + (1 << layout->sb_max_size_bits);
+
+ if (!(offset >= b_end || end <= b_offset))
+ return true;
+ }
+
+ for (i = 0; i < ca->journal.nr; i++)
+ if (b == ca->journal.buckets[i])
+ return true;
+
+ return false;
+}
+
/* Disk reservations: */
#define SECTORS_CACHE 1024
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index ccc78bfe2fd4..3bebc4c3044f 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -308,26 +308,7 @@ int bch2_trans_mark_dev_sbs_flags(struct bch_fs *,
enum btree_iter_update_trigger_flags);
int bch2_trans_mark_dev_sbs(struct bch_fs *);
-static inline bool is_superblock_bucket(struct bch_dev *ca, u64 b)
-{
- struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
- u64 b_offset = bucket_to_sector(ca, b);
- u64 b_end = bucket_to_sector(ca, b + 1);
- unsigned i;
-
- if (!b)
- return true;
-
- for (i = 0; i < layout->nr_superblocks; i++) {
- u64 offset = le64_to_cpu(layout->sb_offset[i]);
- u64 end = offset + (1 << layout->sb_max_size_bits);
-
- if (!(offset >= b_end || end <= b_offset))
- return true;
- }
-
- return false;
-}
+bool bch2_is_superblock_bucket(struct bch_dev *, u64);
static inline const char *bch2_data_type_str(enum bch_data_type type)
{
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index bbdd0b17ae69..95cccda3b22c 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1002,19 +1002,17 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
}
for (nr_got = 0; nr_got < nr_want; nr_got++) {
- if (new_fs) {
- bu[nr_got] = bch2_bucket_alloc_new_fs(ca);
- if (bu[nr_got] < 0) {
- ret = -BCH_ERR_ENOSPC_bucket_alloc;
- break;
- }
- } else {
- ob[nr_got] = bch2_bucket_alloc(c, ca, BCH_WATERMARK_normal,
- BCH_DATA_journal, cl);
- ret = PTR_ERR_OR_ZERO(ob[nr_got]);
- if (ret)
- break;
+ enum bch_watermark watermark = new_fs
+ ? BCH_WATERMARK_btree
+ : BCH_WATERMARK_normal;
+ ob[nr_got] = bch2_bucket_alloc(c, ca, watermark,
+ BCH_DATA_journal, cl);
+ ret = PTR_ERR_OR_ZERO(ob[nr_got]);
+ if (ret)
+ break;
+
+ if (!new_fs) {
ret = bch2_trans_run(c,
bch2_trans_mark_metadata_bucket(trans, ca,
ob[nr_got]->bucket, BCH_DATA_journal,
@@ -1024,9 +1022,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
bch_err_msg(c, ret, "marking new journal buckets");
break;
}
-
- bu[nr_got] = ob[nr_got]->bucket;
}
+
+ bu[nr_got] = ob[nr_got]->bucket;
}
if (!nr_got)
@@ -1066,8 +1064,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
if (ret)
goto err_unblock;
- if (!new_fs)
- bch2_write_super(c);
+ bch2_write_super(c);
/* Commit: */
if (c)
@@ -1101,9 +1098,8 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
bu[i], BCH_DATA_free, 0,
BTREE_TRIGGER_transactional));
err_free:
- if (!new_fs)
- for (i = 0; i < nr_got; i++)
- bch2_open_bucket_put(c, ob[i]);
+ for (i = 0; i < nr_got; i++)
+ bch2_open_bucket_put(c, ob[i]);
kfree(new_bucket_seq);
kfree(new_buckets);
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 3d8fc2642425..1aabbbe328d9 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -38,6 +38,9 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
struct journal_device *ja,
enum journal_space_from from)
{
+ if (!ja->nr)
+ return 0;
+
unsigned available = (journal_space_from(ja, from) -
ja->cur_idx - 1 + ja->nr) % ja->nr;
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 7086a7226989..547c78a323f7 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -1070,7 +1070,6 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
- c->curr_recovery_pass = BCH_RECOVERY_PASS_NR;
set_bit(BCH_FS_btree_running, &c->flags);
set_bit(BCH_FS_may_go_rw, &c->flags);
@@ -1111,9 +1110,6 @@ int bch2_fs_initialize(struct bch_fs *c)
if (ret)
goto err;
- for_each_online_member(c, ca)
- ca->new_fs_bucket_idx = 0;
-
ret = bch2_fs_freespace_init(c);
if (ret)
goto err;
@@ -1172,6 +1168,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
+ c->curr_recovery_pass = BCH_RECOVERY_PASS_NR;
return 0;
err:
bch_err_fn(c, ret);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 08170a3d524f..14157820705d 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1750,11 +1750,6 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
if (ret)
goto err;
- ret = bch2_dev_journal_alloc(ca, true);
- bch_err_msg(c, ret, "allocating journal");
- if (ret)
- goto err;
-
down_write(&c->state_lock);
mutex_lock(&c->sb_lock);
@@ -1805,11 +1800,14 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
if (ret)
goto err_late;
- ca->new_fs_bucket_idx = 0;
-
if (ca->mi.state == BCH_MEMBER_STATE_rw)
__bch2_dev_read_write(c, ca);
+ ret = bch2_dev_journal_alloc(ca, false);
+ bch_err_msg(c, ret, "allocating journal");
+ if (ret)
+ goto err_late;
+
up_write(&c->state_lock);
return 0;
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 09/34] bcachefs: Bad btree roots are now autofix
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (7 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 08/34] bcachefs: Kill bch2_bucket_alloc_new_fs() Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 10/34] bcachefs: Fix dup/misordered check in btree node read Kent Overstreet
` (24 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/sb-errors_format.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index d45d0789f1b1..89d9dc2c859b 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -68,8 +68,8 @@ enum bch_fsck_flags {
x(btree_node_bkey_bad_format, 55, 0) \
x(btree_node_bad_bkey, 56, 0) \
x(btree_node_bkey_out_of_order, 57, 0) \
- x(btree_root_bkey_invalid, 58, 0) \
- x(btree_root_read_error, 59, 0) \
+ x(btree_root_bkey_invalid, 58, FSCK_AUTOFIX) \
+ x(btree_root_read_error, 59, FSCK_AUTOFIX) \
x(btree_root_bad_min_key, 60, 0) \
x(btree_root_bad_max_key, 61, 0) \
x(btree_node_read_error, 62, 0) \
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 10/34] bcachefs: Fix dup/misordered check in btree node read
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (8 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 09/34] bcachefs: Bad btree roots are now autofix Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 11/34] bcachefs: Don't try to en/decrypt when encryption not available Kent Overstreet
` (23 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+dedbd67513939979f84f
We were checking for out of order keys, but not duplicate keys.
Reported-by: syzbot+dedbd67513939979f84f@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/btree_io.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 89a42ee81e5c..2b5da566fbac 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -857,6 +857,14 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent);
}
+static inline int btree_node_read_bkey_cmp(const struct btree *b,
+ const struct bkey_packed *l,
+ const struct bkey_packed *r)
+{
+ return bch2_bkey_cmp_packed(b, l, r)
+ ?: (int) bkey_deleted(r) - (int) bkey_deleted(l);
+}
+
static int validate_bset_keys(struct bch_fs *c, struct btree *b,
struct bset *i, int write,
bool have_retry, bool *saw_error)
@@ -917,7 +925,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
BSET_BIG_ENDIAN(i), write,
&b->format, k);
- if (prev && bkey_iter_cmp(b, prev, k) > 0) {
+ if (prev && btree_node_read_bkey_cmp(b, prev, k) >= 0) {
struct bkey up = bkey_unpack_key(b, prev);
printbuf_reset(&buf);
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 11/34] bcachefs: Don't try to en/decrypt when encryption not available
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (9 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 10/34] bcachefs: Fix dup/misordered check in btree node read Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 12/34] bcachefs: Change "disk accounting version 0" check to commit only Kent Overstreet
` (22 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+026f1857b12f5eb3f9e9
If a btree node says it's encrypted, but the superblock never had an
encryptino key - whoops, that needs to be handled.
Reported-by: syzbot+026f1857b12f5eb3f9e9@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/btree_io.c | 116 +++++++++++++++++-----------------
fs/bcachefs/btree_node_scan.c | 3 +
fs/bcachefs/checksum.c | 10 ++-
fs/bcachefs/errcode.h | 1 +
fs/bcachefs/io_read.c | 14 +++-
5 files changed, 83 insertions(+), 61 deletions(-)
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 2b5da566fbac..5407f1212b4f 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1045,39 +1045,50 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
while (b->written < (ptr_written ?: btree_sectors(c))) {
unsigned sectors;
- struct nonce nonce;
bool first = !b->written;
- bool csum_bad;
- if (!b->written) {
+ if (first) {
i = &b->data->keys;
+ } else {
+ bne = write_block(b);
+ i = &bne->keys;
- btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
- -BCH_ERR_btree_node_read_err_want_retry,
- c, ca, b, i, NULL,
- bset_unknown_csum,
- "unknown checksum type %llu", BSET_CSUM_TYPE(i));
-
- nonce = btree_nonce(i, b->written << 9);
+ if (i->seq != b->data->keys.seq)
+ break;
+ }
- struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
- csum_bad = bch2_crc_cmp(b->data->csum, csum);
- if (csum_bad)
- bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
+ struct nonce nonce = btree_nonce(i, b->written << 9);
+ bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i));
- btree_err_on(csum_bad,
- -BCH_ERR_btree_node_read_err_want_retry,
- c, ca, b, i, NULL,
- bset_bad_csum,
- "%s",
- (printbuf_reset(&buf),
- bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum),
- buf.buf));
-
- ret = bset_encrypt(c, i, b->written << 9);
- if (bch2_fs_fatal_err_on(ret, c,
- "decrypting btree node: %s", bch2_err_str(ret)))
- goto fsck_err;
+ btree_err_on(!good_csum_type,
+ bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i))
+ ? -BCH_ERR_btree_node_read_err_must_retry
+ : -BCH_ERR_btree_node_read_err_want_retry,
+ c, ca, b, i, NULL,
+ bset_unknown_csum,
+ "unknown checksum type %llu", BSET_CSUM_TYPE(i));
+
+ if (first) {
+ if (good_csum_type) {
+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
+ bool csum_bad = bch2_crc_cmp(b->data->csum, csum);
+ if (csum_bad)
+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
+
+ btree_err_on(csum_bad,
+ -BCH_ERR_btree_node_read_err_want_retry,
+ c, ca, b, i, NULL,
+ bset_bad_csum,
+ "%s",
+ (printbuf_reset(&buf),
+ bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum),
+ buf.buf));
+
+ ret = bset_encrypt(c, i, b->written << 9);
+ if (bch2_fs_fatal_err_on(ret, c,
+ "decrypting btree node: %s", bch2_err_str(ret)))
+ goto fsck_err;
+ }
btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
@@ -1088,37 +1099,26 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
sectors = vstruct_sectors(b->data, c->block_bits);
} else {
- bne = write_block(b);
- i = &bne->keys;
-
- if (i->seq != b->data->keys.seq)
- break;
-
- btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
- -BCH_ERR_btree_node_read_err_want_retry,
- c, ca, b, i, NULL,
- bset_unknown_csum,
- "unknown checksum type %llu", BSET_CSUM_TYPE(i));
-
- nonce = btree_nonce(i, b->written << 9);
- struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
- csum_bad = bch2_crc_cmp(bne->csum, csum);
- if (ca && csum_bad)
- bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
-
- btree_err_on(csum_bad,
- -BCH_ERR_btree_node_read_err_want_retry,
- c, ca, b, i, NULL,
- bset_bad_csum,
- "%s",
- (printbuf_reset(&buf),
- bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum),
- buf.buf));
-
- ret = bset_encrypt(c, i, b->written << 9);
- if (bch2_fs_fatal_err_on(ret, c,
- "decrypting btree node: %s", bch2_err_str(ret)))
- goto fsck_err;
+ if (good_csum_type) {
+ struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+ bool csum_bad = bch2_crc_cmp(bne->csum, csum);
+ if (ca && csum_bad)
+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
+
+ btree_err_on(csum_bad,
+ -BCH_ERR_btree_node_read_err_want_retry,
+ c, ca, b, i, NULL,
+ bset_bad_csum,
+ "%s",
+ (printbuf_reset(&buf),
+ bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum),
+ buf.buf));
+
+ ret = bset_encrypt(c, i, b->written << 9);
+ if (bch2_fs_fatal_err_on(ret, c,
+ "decrypting btree node: %s", bch2_err_str(ret)))
+ goto fsck_err;
+ }
sectors = vstruct_sectors(bne, c->block_bits);
}
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index 4b4df31d4b95..327f1a1859b9 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -159,6 +159,9 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
return;
if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
+ if (!c->chacha20)
+ return;
+
struct nonce nonce = btree_nonce(&bn->keys, 0);
unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
index ce8fc677bef9..23a383577d4c 100644
--- a/fs/bcachefs/checksum.c
+++ b/fs/bcachefs/checksum.c
@@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "checksum.h"
#include "errcode.h"
+#include "error.h"
#include "super.h"
#include "super-io.h"
@@ -252,6 +253,10 @@ int bch2_encrypt(struct bch_fs *c, unsigned type,
if (!bch2_csum_type_is_encryption(type))
return 0;
+ if (bch2_fs_inconsistent_on(!c->chacha20,
+ c, "attempting to encrypt without encryption key"))
+ return -BCH_ERR_no_encryption_key;
+
return do_encrypt(c->chacha20, nonce, data, len);
}
@@ -337,8 +342,9 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type,
size_t sgl_len = 0;
int ret = 0;
- if (!bch2_csum_type_is_encryption(type))
- return 0;
+ if (bch2_fs_inconsistent_on(!c->chacha20,
+ c, "attempting to encrypt without encryption key"))
+ return -BCH_ERR_no_encryption_key;
darray_init(&sgl);
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index c989ce4f715f..a12050e9c191 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -260,6 +260,7 @@
x(EIO, no_device_to_read_from) \
x(EIO, missing_indirect_extent) \
x(EIO, invalidate_stripe_to_dev) \
+ x(EIO, no_encryption_key) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \
x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index eb8d12fd6398..4b6b6d25725b 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -830,7 +830,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
if (!pick_ret)
goto hole;
- if (pick_ret < 0) {
+ if (unlikely(pick_ret < 0)) {
struct printbuf buf = PRINTBUF;
bch2_bkey_val_to_text(&buf, c, k);
@@ -843,6 +843,18 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
goto err;
}
+ if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ bch_err_inum_offset_ratelimited(c,
+ read_pos.inode, read_pos.offset << 9,
+ "attempting to read encrypted data without encryption key\n %s",
+ buf.buf);
+ printbuf_exit(&buf);
+ goto err;
+ }
+
struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
/*
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 12/34] bcachefs: Change "disk accounting version 0" check to commit only
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (10 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 11/34] bcachefs: Don't try to en/decrypt when encryption not available Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 13/34] bcachefs: Fix bch2_btree_node_update_key_early() Kent Overstreet
` (21 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
6.11 had a bug where we'd sometimes create disk accounting keys with
version 0, which causes issues for journal replay - but we don't need to
delete existing accounting keys with version 0.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/disk_accounting.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index bb5dbbf71d04..c5e61265b709 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -134,7 +134,8 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
void *end = &acc_k + 1;
int ret = 0;
- bkey_fsck_err_on(bversion_zero(k.k->bversion),
+ bkey_fsck_err_on((flags & BCH_VALIDATE_commit) &&
+ bversion_zero(k.k->bversion),
c, accounting_key_version_0,
"accounting key with version=0");
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 13/34] bcachefs: Fix bch2_btree_node_update_key_early()
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (11 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 12/34] bcachefs: Change "disk accounting version 0" check to commit only Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 14/34] bcachefs: Go RW earlier, for normal rw mount Kent Overstreet
` (20 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, Tyler
Fix an assertion pop from the recent btree cache freelist fixes.
Fixes: baefd3f849ed ("bcachefs: btree_cache.freeable list fixes")
Reported-by: Tyler <th020394@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/btree_cache.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 1f06e24e53fc..1117be901cf0 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -326,7 +326,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans,
if (!IS_ERR_OR_NULL(b)) {
mutex_lock(&c->btree_cache.lock);
- bch2_btree_node_hash_remove(&c->btree_cache, b);
+ __bch2_btree_node_hash_remove(&c->btree_cache, b);
bkey_copy(&b->key, new);
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 14/34] bcachefs: Go RW earlier, for normal rw mount
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (12 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 13/34] bcachefs: Fix bch2_btree_node_update_key_early() Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 15/34] bcachefs: Fix null ptr deref in btree_path_lock_root() Kent Overstreet
` (19 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Previously, when mounting read-write after a clean shutdown, we wouldn't
go read-write until after all the recovery passes completed.
Now, go RW early in recovery, the same as any other situation we'll need
to go read-write. This fixes a bug where we discover unlinked inodes
after a clean shutdown: repair fails because we're read only.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/recovery_passes.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
index 1240c5c19fea..f6d3a99cb63e 100644
--- a/fs/bcachefs/recovery_passes.c
+++ b/fs/bcachefs/recovery_passes.c
@@ -46,7 +46,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
set_bit(BCH_FS_may_go_rw, &c->flags);
- if (keys->nr || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes)
+ if (keys->nr || !c->opts.read_only || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes)
return bch2_fs_read_write_early(c);
return 0;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 15/34] bcachefs: Fix null ptr deref in btree_path_lock_root()
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (13 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 14/34] bcachefs: Go RW earlier, for normal rw mount Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 16/34] bcachefs: Ignore empty btree root journal entries Kent Overstreet
` (18 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+e22007d6acb9c87c2362
Historically, we required that all btree node roots point to a valid
(possibly fake) node, but we're improving our ability to continue in the
presence of errors.
Reported-by: syzbot+e22007d6acb9c87c2362@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/btree_iter.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 89f9665ce70d..80c3b55ce763 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -722,7 +722,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
unsigned long trace_ip)
{
struct bch_fs *c = trans->c;
- struct btree *b, **rootp = &bch2_btree_id_root(c, path->btree_id)->b;
+ struct btree_root *r = bch2_btree_id_root(c, path->btree_id);
enum six_lock_type lock_type;
unsigned i;
int ret;
@@ -730,7 +730,12 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
EBUG_ON(path->nodes_locked);
while (1) {
- b = READ_ONCE(*rootp);
+ struct btree *b = READ_ONCE(r->b);
+ if (unlikely(!b)) {
+ BUG_ON(!r->error);
+ return r->error;
+ }
+
path->level = READ_ONCE(b->c.level);
if (unlikely(path->level < depth_want)) {
@@ -755,7 +760,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
BUG();
}
- if (likely(b == READ_ONCE(*rootp) &&
+ if (likely(b == READ_ONCE(r->b) &&
b->c.level == path->level &&
!race_fault())) {
for (i = 0; i < path->level; i++)
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 16/34] bcachefs: Ignore empty btree root journal entries
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (14 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 15/34] bcachefs: Fix null ptr deref in btree_path_lock_root() Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 17/34] bcachefs: struct bkey_validate_context Kent Overstreet
` (17 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+e22007d6acb9c87c2362
There's no reason to treat them as errors: just ignore them, and go with
a previous btree root if we had one.
Reported-by: syzbot+e22007d6acb9c87c2362@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/recovery.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 547c78a323f7..727e894762f5 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -442,7 +442,9 @@ static int journal_replay_entry_early(struct bch_fs *c,
switch (entry->type) {
case BCH_JSET_ENTRY_btree_root: {
- struct btree_root *r;
+
+ if (unlikely(!entry->u64s))
+ return 0;
if (fsck_err_on(entry->btree_id >= BTREE_ID_NR_MAX,
c, invalid_btree_id,
@@ -456,15 +458,11 @@ static int journal_replay_entry_early(struct bch_fs *c,
return ret;
}
- r = bch2_btree_id_root(c, entry->btree_id);
+ struct btree_root *r = bch2_btree_id_root(c, entry->btree_id);
- if (entry->u64s) {
- r->level = entry->level;
- bkey_copy(&r->key, (struct bkey_i *) entry->start);
- r->error = 0;
- } else {
- r->error = -BCH_ERR_btree_node_read_error;
- }
+ r->level = entry->level;
+ bkey_copy(&r->key, (struct bkey_i *) entry->start);
+ r->error = 0;
r->alive = true;
break;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 17/34] bcachefs: struct bkey_validate_context
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (15 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 16/34] bcachefs: Ignore empty btree root journal entries Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 18/34] bcachefs: Make topology errors autofix Kent Overstreet
` (16 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Add a new parameter to bkey validate functions, and use it to improve
invalid bkey error messages: we can now print the btree and depth it
came from, or if it came from the journal, or is a btree root.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/alloc_background.c | 10 +++----
fs/bcachefs/alloc_background.h | 16 ++++++-----
fs/bcachefs/backpointers.c | 2 +-
fs/bcachefs/backpointers.h | 3 +-
fs/bcachefs/bkey.h | 7 -----
fs/bcachefs/bkey_methods.c | 29 ++++++++++---------
fs/bcachefs/bkey_methods.h | 15 +++++-----
fs/bcachefs/bkey_types.h | 26 +++++++++++++++++
fs/bcachefs/btree_io.c | 44 ++++++++++++++++++++++++-----
fs/bcachefs/btree_node_scan.c | 7 ++++-
fs/bcachefs/btree_trans_commit.c | 7 ++++-
fs/bcachefs/btree_update_interior.c | 11 ++++++--
fs/bcachefs/data_update.c | 7 +++--
fs/bcachefs/dirent.c | 4 +--
fs/bcachefs/dirent.h | 4 +--
fs/bcachefs/disk_accounting.c | 4 +--
fs/bcachefs/disk_accounting.h | 3 +-
fs/bcachefs/ec.c | 4 +--
fs/bcachefs/ec.h | 5 ++--
fs/bcachefs/error.c | 20 ++++++++++---
fs/bcachefs/error.h | 4 +--
fs/bcachefs/extents.c | 20 ++++++-------
fs/bcachefs/extents.h | 9 +++---
fs/bcachefs/inode.c | 16 +++++------
fs/bcachefs/inode.h | 9 +++---
fs/bcachefs/journal_io.c | 35 ++++++++++++++---------
fs/bcachefs/lru.c | 2 +-
fs/bcachefs/lru.h | 2 +-
fs/bcachefs/quota.c | 2 +-
fs/bcachefs/quota.h | 4 +--
fs/bcachefs/recovery.c | 1 +
fs/bcachefs/reflink.c | 8 +++---
fs/bcachefs/reflink.h | 10 +++----
fs/bcachefs/snapshot.c | 4 +--
fs/bcachefs/snapshot.h | 7 ++---
fs/bcachefs/subvolume.c | 2 +-
fs/bcachefs/subvolume.h | 5 ++--
fs/bcachefs/xattr.c | 2 +-
fs/bcachefs/xattr.h | 3 +-
39 files changed, 231 insertions(+), 142 deletions(-)
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 1e9f53db4bb8..8846daaa1162 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -198,7 +198,7 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
}
int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
int ret = 0;
@@ -213,7 +213,7 @@ int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k,
}
int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_alloc_unpacked u;
int ret = 0;
@@ -226,7 +226,7 @@ int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k,
}
int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_alloc_unpacked u;
int ret = 0;
@@ -239,7 +239,7 @@ int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k,
}
int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bch_alloc_v4 a;
int ret = 0;
@@ -509,7 +509,7 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset)
}
int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
int ret = 0;
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index 57723a37abb8..8cacddd188f4 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -8,8 +8,6 @@
#include "debug.h"
#include "super.h"
-enum bch_validate_flags;
-
/* How out of date a pointer gen is allowed to be: */
#define BUCKET_GC_GEN_MAX 96U
@@ -245,10 +243,14 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s
int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
-int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
-int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
-int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
-int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
+int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
+int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
+int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
+int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
void bch2_alloc_v4_swab(struct bkey_s);
void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
@@ -282,7 +284,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
})
int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c,
- enum bch_validate_flags);
+ struct bkey_validate_context);
void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index cfd9b9ead473..ff08afd667a0 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -15,7 +15,7 @@
#include <linux/mm.h>
int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
int ret = 0;
diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h
index d8a15f5fa767..95caeabb8978 100644
--- a/fs/bcachefs/backpointers.h
+++ b/fs/bcachefs/backpointers.h
@@ -18,7 +18,8 @@ static inline u64 swab40(u64 x)
((x & 0xff00000000ULL) >> 32));
}
-int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags);
+int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k,
+ struct bkey_validate_context);
void bch2_backpointer_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
void bch2_backpointer_swab(struct bkey_s);
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index 41df24a53d97..054e2d5e8448 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -9,13 +9,6 @@
#include "util.h"
#include "vstructs.h"
-enum bch_validate_flags {
- BCH_VALIDATE_write = BIT(0),
- BCH_VALIDATE_commit = BIT(1),
- BCH_VALIDATE_journal = BIT(2),
- BCH_VALIDATE_silent = BIT(3),
-};
-
#if 0
/*
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index e7ac227ba7e8..15c93576b5c2 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -28,7 +28,7 @@ const char * const bch2_bkey_types[] = {
};
static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
return 0;
}
@@ -42,7 +42,7 @@ static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k,
})
static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
int ret = 0;
@@ -59,7 +59,7 @@ static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k,
})
static int key_type_cookie_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
return 0;
}
@@ -83,7 +83,7 @@ static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c,
})
static int key_type_inline_data_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
return 0;
}
@@ -124,7 +124,7 @@ const struct bkey_ops bch2_bkey_null_ops = {
};
int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
if (test_bit(BCH_FS_no_invalid_checks, &c->flags))
return 0;
@@ -140,7 +140,7 @@ int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k,
if (!ops->key_validate)
return 0;
- ret = ops->key_validate(c, k, flags);
+ ret = ops->key_validate(c, k, from);
fsck_err:
return ret;
}
@@ -161,9 +161,10 @@ const char *bch2_btree_node_type_str(enum btree_node_type type)
}
int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
- enum btree_node_type type,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
+ enum btree_node_type type = __btree_node_type(from.level, from.btree);
+
if (test_bit(BCH_FS_no_invalid_checks, &c->flags))
return 0;
@@ -177,7 +178,7 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
return 0;
bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX &&
- (type == BKEY_TYPE_btree || (flags & BCH_VALIDATE_commit)) &&
+ (type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) &&
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)),
c, bkey_invalid_type_for_btree,
"invalid key type for btree %s (%s)",
@@ -228,15 +229,15 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
}
int bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k,
- enum btree_node_type type,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
- return __bch2_bkey_validate(c, k, type, flags) ?:
- bch2_bkey_val_validate(c, k, flags);
+ return __bch2_bkey_validate(c, k, from) ?:
+ bch2_bkey_val_validate(c, k, from);
}
int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b,
- struct bkey_s_c k, enum bch_validate_flags flags)
+ struct bkey_s_c k,
+ struct bkey_validate_context from)
{
int ret = 0;
diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h
index 018fb72e32d3..bf34111cdf00 100644
--- a/fs/bcachefs/bkey_methods.h
+++ b/fs/bcachefs/bkey_methods.h
@@ -22,7 +22,7 @@ extern const struct bkey_ops bch2_bkey_null_ops;
*/
struct bkey_ops {
int (*key_validate)(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags);
+ struct bkey_validate_context from);
void (*val_to_text)(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
void (*swab)(struct bkey_s);
@@ -48,13 +48,14 @@ static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type)
: &bch2_bkey_null_ops;
}
-int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
-int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
- enum bch_validate_flags);
-int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type,
- enum bch_validate_flags);
+int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
+int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
+int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, struct bkey_s_c,
- enum bch_validate_flags);
+ struct bkey_validate_context from);
void bch2_bpos_to_text(struct printbuf *, struct bpos);
void bch2_bkey_to_text(struct printbuf *, const struct bkey *);
diff --git a/fs/bcachefs/bkey_types.h b/fs/bcachefs/bkey_types.h
index c9ae9e42b385..2af6279b02a9 100644
--- a/fs/bcachefs/bkey_types.h
+++ b/fs/bcachefs/bkey_types.h
@@ -210,4 +210,30 @@ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
BCH_BKEY_TYPES();
#undef x
+enum bch_validate_flags {
+ BCH_VALIDATE_write = BIT(0),
+ BCH_VALIDATE_commit = BIT(1),
+ BCH_VALIDATE_journal = BIT(2),
+ BCH_VALIDATE_silent = BIT(3),
+};
+
+#define BKEY_VALIDATE_CONTEXTS() \
+ x(unknown) \
+ x(commit) \
+ x(journal) \
+ x(btree_root) \
+ x(btree_node)
+
+struct bkey_validate_context {
+ enum {
+#define x(n) BKEY_VALIDATE_##n,
+ BKEY_VALIDATE_CONTEXTS()
+#undef x
+ } from:8;
+ u8 level;
+ enum btree_id btree;
+ bool root:1;
+ enum bch_validate_flags flags:8;
+};
+
#endif /* _BCACHEFS_BKEY_TYPES_H */
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 5407f1212b4f..228a8bd7a673 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -831,13 +831,32 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
return ret;
}
+static int btree_node_bkey_val_validate(struct bch_fs *c, struct btree *b,
+ struct bkey_s_c k,
+ enum bch_validate_flags flags)
+{
+ return bch2_bkey_val_validate(c, k, (struct bkey_validate_context) {
+ .from = BKEY_VALIDATE_btree_node,
+ .level = b->c.level,
+ .btree = b->c.btree_id,
+ .flags = flags
+ });
+}
+
static int bset_key_validate(struct bch_fs *c, struct btree *b,
struct bkey_s_c k,
- bool updated_range, int rw)
+ bool updated_range,
+ enum bch_validate_flags flags)
{
- return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?:
- (!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?:
- (rw == WRITE ? bch2_bkey_val_validate(c, k, 0) : 0);
+ struct bkey_validate_context from = (struct bkey_validate_context) {
+ .from = BKEY_VALIDATE_btree_node,
+ .level = b->c.level,
+ .btree = b->c.btree_id,
+ .flags = flags,
+ };
+ return __bch2_bkey_validate(c, k, from) ?:
+ (!updated_range ? bch2_bkey_in_btree_node(c, b, k, from) : 0) ?:
+ (flags & BCH_VALIDATE_write ? btree_node_bkey_val_validate(c, b, k, flags) : 0);
}
static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
@@ -854,7 +873,13 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
struct bkey tmp;
struct bkey_s u = __bkey_disassemble(b, k, &tmp);
- return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent);
+ return !__bch2_bkey_validate(c, u.s_c,
+ (struct bkey_validate_context) {
+ .from = BKEY_VALIDATE_btree_node,
+ .level = b->c.level,
+ .btree = b->c.btree_id,
+ .flags = BCH_VALIDATE_silent
+ });
}
static inline int btree_node_read_bkey_cmp(const struct btree *b,
@@ -1223,7 +1248,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
struct bkey tmp;
struct bkey_s u = __bkey_disassemble(b, k, &tmp);
- ret = bch2_bkey_val_validate(c, u.s_c, READ);
+ ret = btree_node_bkey_val_validate(c, b, u.s_c, READ);
if (ret == -BCH_ERR_fsck_delete_bkey ||
(bch2_inject_invalid_keys &&
!bversion_cmp(u.k->bversion, MAX_VERSION))) {
@@ -1942,7 +1967,12 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
bool saw_error;
int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key),
- BKEY_TYPE_btree, WRITE);
+ (struct bkey_validate_context) {
+ .from = BKEY_VALIDATE_btree_node,
+ .level = b->c.level + 1,
+ .btree = b->c.btree_id,
+ .flags = BCH_VALIDATE_write,
+ });
if (ret) {
bch2_fs_inconsistent(c, "invalid btree node key before write");
return ret;
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index 327f1a1859b9..eeafb5e7354e 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -538,7 +538,12 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
printbuf_exit(&buf);
- BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0));
+ BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k),
+ (struct bkey_validate_context) {
+ .from = BKEY_VALIDATE_btree_node,
+ .level = level + 1,
+ .btree = btree,
+ }));
ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k);
if (ret)
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index cf313477567a..78d72c26083d 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -726,7 +726,12 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags,
invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit;
ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k),
- i->bkey_type, invalid_flags);
+ (struct bkey_validate_context) {
+ .from = BKEY_VALIDATE_commit,
+ .level = i->level,
+ .btree = i->btree_id,
+ .flags = invalid_flags,
+ });
if (unlikely(ret)){
bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n",
trans->fn, (void *) i->ip_allocated);
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index faa2816e02a0..56a70e95ef9a 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1360,9 +1360,14 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
if (unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags)))
bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p);
- if (bch2_bkey_validate(c, bkey_i_to_s_c(insert),
- btree_node_type(b), BCH_VALIDATE_write) ?:
- bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), BCH_VALIDATE_write)) {
+ struct bkey_validate_context from = (struct bkey_validate_context) {
+ .from = BKEY_VALIDATE_btree_node,
+ .level = b->c.level,
+ .btree = b->c.btree_id,
+ .flags = BCH_VALIDATE_commit,
+ };
+ if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), from) ?:
+ bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), from)) {
bch2_fs_inconsistent(c, "%s: inserting invalid bkey", __func__);
dump_stack();
}
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index e4af2ccdf4c8..31b2aeb0c6e6 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -318,8 +318,11 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
* it's been hard to reproduce, so this should give us some more
* information when it does occur:
*/
- int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id),
- BCH_VALIDATE_commit);
+ int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert),
+ (struct bkey_validate_context) {
+ .btree = m->btree_id,
+ .flags = BCH_VALIDATE_commit,
+ });
if (invalid) {
struct printbuf buf = PRINTBUF;
diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index 4c22f78b0484..41813f9ce831 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -101,7 +101,7 @@ const struct bch_hash_desc bch2_dirent_hash_desc = {
};
int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
struct qstr d_name = bch2_dirent_get_name(d);
@@ -120,7 +120,7 @@ int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k,
* Check new keys don't exceed the max length
* (older keys may be larger.)
*/
- bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX,
+ bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX,
c, dirent_name_too_long,
"dirent name too big (%u > %u)",
d_name.len, BCH_NAME_MAX);
diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
index 53ad99666022..362b3b2f2f2e 100644
--- a/fs/bcachefs/dirent.h
+++ b/fs/bcachefs/dirent.h
@@ -4,10 +4,10 @@
#include "str_hash.h"
-enum bch_validate_flags;
extern const struct bch_hash_desc bch2_dirent_hash_desc;
-int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
+int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_dirent ((struct bkey_ops) { \
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index c5e61265b709..71c49a7ee2fe 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -127,14 +127,14 @@ static inline bool is_zero(char *start, char *end)
#define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member))
int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct disk_accounting_pos acc_k;
bpos_to_disk_accounting_pos(&acc_k, k.k->p);
void *end = &acc_k + 1;
int ret = 0;
- bkey_fsck_err_on((flags & BCH_VALIDATE_commit) &&
+ bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) &&
bversion_zero(k.k->bversion),
c, accounting_key_version_0,
"accounting key with version=0");
diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h
index 8b2b2f83e6a4..566aa2a8539d 100644
--- a/fs/bcachefs/disk_accounting.h
+++ b/fs/bcachefs/disk_accounting.h
@@ -83,7 +83,8 @@ int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *,
s64 *, unsigned, bool);
int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool);
-int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
+int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *);
void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
void bch2_accounting_swab(struct bkey_s);
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index f6b7b8b54f62..7d6c33f04092 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -110,7 +110,7 @@ struct ec_bio {
/* Stripes btree keys: */
int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
int ret = 0;
@@ -130,7 +130,7 @@ int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k,
"invalid csum granularity (%u >= 64)",
s->csum_granularity_bits);
- ret = bch2_bkey_ptrs_validate(c, k, flags);
+ ret = bch2_bkey_ptrs_validate(c, k, from);
fsck_err:
return ret;
}
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index 43326370b410..583ca6a226da 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -6,9 +6,8 @@
#include "buckets_types.h"
#include "extents_types.h"
-enum bch_validate_flags;
-
-int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
+int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned,
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 2960baa023f6..9a695322b33c 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "btree_cache.h"
#include "btree_iter.h"
#include "error.h"
#include "journal.h"
@@ -443,23 +444,34 @@ int __bch2_fsck_err(struct bch_fs *c,
return ret;
}
+static const char * const bch2_bkey_validate_contexts[] = {
+#define x(n) #n,
+ BKEY_VALIDATE_CONTEXTS()
+#undef x
+ NULL
+};
+
int __bch2_bkey_fsck_err(struct bch_fs *c,
struct bkey_s_c k,
- enum bch_validate_flags validate_flags,
+ struct bkey_validate_context from,
enum bch_sb_error_id err,
const char *fmt, ...)
{
- if (validate_flags & BCH_VALIDATE_silent)
+ if (from.flags & BCH_VALIDATE_silent)
return -BCH_ERR_fsck_delete_bkey;
unsigned fsck_flags = 0;
- if (!(validate_flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)))
+ if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)))
fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX;
struct printbuf buf = PRINTBUF;
va_list args;
- prt_str(&buf, "invalid bkey ");
+ prt_printf(&buf, "invalid bkey in %s btree=",
+ bch2_bkey_validate_contexts[from.from]);
+ bch2_btree_id_to_text(&buf, from.btree);
+ prt_printf(&buf, " level=%u: ", from.level);
+
bch2_bkey_val_to_text(&buf, c, k);
prt_str(&buf, "\n ");
va_start(args, fmt);
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 8327a3461535..3b278f28e56b 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -153,7 +153,7 @@ enum bch_validate_flags;
__printf(5, 6)
int __bch2_bkey_fsck_err(struct bch_fs *,
struct bkey_s_c,
- enum bch_validate_flags,
+ struct bkey_validate_context from,
enum bch_sb_error_id,
const char *, ...);
@@ -163,7 +163,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *,
*/
#define bkey_fsck_err(c, _err_type, _err_msg, ...) \
do { \
- int _ret = __bch2_bkey_fsck_err(c, k, flags, \
+ int _ret = __bch2_bkey_fsck_err(c, k, from, \
BCH_FSCK_ERR_##_err_type, \
_err_msg, ##__VA_ARGS__); \
if (_ret != -BCH_ERR_fsck_fix && \
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 83aeceb68847..aa3b88291814 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -178,7 +178,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
/* KEY_TYPE_btree_ptr: */
int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
int ret = 0;
@@ -186,7 +186,7 @@ int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k,
c, btree_ptr_val_too_big,
"value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX);
- ret = bch2_bkey_ptrs_validate(c, k, flags);
+ ret = bch2_bkey_ptrs_validate(c, k, from);
fsck_err:
return ret;
}
@@ -198,7 +198,7 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
}
int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
int ret = 0;
@@ -212,13 +212,13 @@ int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k,
c, btree_ptr_v2_min_key_bad,
"min_key > key");
- if ((flags & BCH_VALIDATE_write) &&
+ if ((from.flags & BCH_VALIDATE_write) &&
c->sb.version_min >= bcachefs_metadata_version_btree_ptr_sectors_written)
bkey_fsck_err_on(!bp.v->sectors_written,
c, btree_ptr_v2_written_0,
"sectors_written == 0");
- ret = bch2_bkey_ptrs_validate(c, k, flags);
+ ret = bch2_bkey_ptrs_validate(c, k, from);
fsck_err:
return ret;
}
@@ -405,7 +405,7 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r)
/* KEY_TYPE_reservation: */
int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
int ret = 0;
@@ -1231,7 +1231,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
static int extent_ptr_validate(struct bch_fs *c,
struct bkey_s_c k,
- enum bch_validate_flags flags,
+ struct bkey_validate_context from,
const struct bch_extent_ptr *ptr,
unsigned size_ondisk,
bool metadata)
@@ -1274,7 +1274,7 @@ static int extent_ptr_validate(struct bch_fs *c,
}
int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
@@ -1301,7 +1301,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
switch (extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr:
- ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false);
+ ret = extent_ptr_validate(c, k, from, &entry->ptr, size_ondisk, false);
if (ret)
return ret;
@@ -1348,7 +1348,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
bkey_fsck_err_on(crc_is_encoded(crc) &&
(crc.uncompressed_size > c->opts.encoded_extent_max >> 9) &&
- (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)),
+ (from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)),
c, ptr_crc_uncompressed_size_too_big,
"too large encoded extent");
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index ba33788fee36..620b284aa34f 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -8,7 +8,6 @@
struct bch_fs;
struct btree_trans;
-enum bch_validate_flags;
/* extent entries: */
@@ -410,12 +409,12 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
/* KEY_TYPE_btree_ptr: */
int bch2_btree_ptr_validate(struct bch_fs *, struct bkey_s_c,
- enum bch_validate_flags);
+ struct bkey_validate_context);
void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
int bch2_btree_ptr_v2_validate(struct bch_fs *, struct bkey_s_c,
- enum bch_validate_flags);
+ struct bkey_validate_context);
void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
int, struct bkey_s);
@@ -452,7 +451,7 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
/* KEY_TYPE_reservation: */
int bch2_reservation_validate(struct bch_fs *, struct bkey_s_c,
- enum bch_validate_flags);
+ struct bkey_validate_context);
void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
@@ -696,7 +695,7 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
int bch2_bkey_ptrs_validate(struct bch_fs *, struct bkey_s_c,
- enum bch_validate_flags);
+ struct bkey_validate_context);
static inline bool bch2_extent_ptr_eq(struct bch_extent_ptr ptr1,
struct bch_extent_ptr ptr2)
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 5c603ab66be0..8818e41883f2 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -429,7 +429,7 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k)
}
static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bch_inode_unpacked unpacked;
int ret = 0;
@@ -469,7 +469,7 @@ static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k,
}
int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
int ret = 0;
@@ -479,13 +479,13 @@ int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k,
"invalid str hash type (%llu >= %u)",
INODEv1_STR_HASH(inode.v), BCH_STR_HASH_NR);
- ret = __bch2_inode_validate(c, k, flags);
+ ret = __bch2_inode_validate(c, k, from);
fsck_err:
return ret;
}
int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
int ret = 0;
@@ -495,13 +495,13 @@ int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k,
"invalid str hash type (%llu >= %u)",
INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR);
- ret = __bch2_inode_validate(c, k, flags);
+ ret = __bch2_inode_validate(c, k, from);
fsck_err:
return ret;
}
int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k);
int ret = 0;
@@ -519,7 +519,7 @@ int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k,
"invalid str hash type (%llu >= %u)",
INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR);
- ret = __bch2_inode_validate(c, k, flags);
+ ret = __bch2_inode_validate(c, k, from);
fsck_err:
return ret;
}
@@ -780,7 +780,7 @@ int bch2_trigger_inode(struct btree_trans *trans,
}
int bch2_inode_generation_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
int ret = 0;
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index f52336cb298f..927c875976da 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -7,15 +7,14 @@
#include "opts.h"
#include "snapshot.h"
-enum bch_validate_flags;
extern const char * const bch2_inode_opts[];
int bch2_inode_validate(struct bch_fs *, struct bkey_s_c,
- enum bch_validate_flags);
+ struct bkey_validate_context);
int bch2_inode_v2_validate(struct bch_fs *, struct bkey_s_c,
- enum bch_validate_flags);
+ struct bkey_validate_context);
int bch2_inode_v3_validate(struct bch_fs *, struct bkey_s_c,
- enum bch_validate_flags);
+ struct bkey_validate_context);
void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
int __bch2_inode_has_child_snapshots(struct btree_trans *, struct bpos);
@@ -60,7 +59,7 @@ static inline bool bkey_is_inode(const struct bkey *k)
}
int bch2_inode_generation_validate(struct bch_fs *, struct bkey_s_c,
- enum bch_validate_flags);
+ struct bkey_validate_context);
void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 768a3b950997..1627f3e16517 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -327,11 +327,11 @@ static void journal_entry_err_msg(struct printbuf *out,
static int journal_validate_key(struct bch_fs *c,
struct jset *jset,
struct jset_entry *entry,
- unsigned level, enum btree_id btree_id,
struct bkey_i *k,
- unsigned version, int big_endian,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from,
+ unsigned version, int big_endian)
{
+ enum bch_validate_flags flags = from.flags;
int write = flags & BCH_VALIDATE_write;
void *next = vstruct_next(entry);
int ret = 0;
@@ -366,11 +366,10 @@ static int journal_validate_key(struct bch_fs *c,
}
if (!write)
- bch2_bkey_compat(level, btree_id, version, big_endian,
+ bch2_bkey_compat(from.level, from.btree, version, big_endian,
write, NULL, bkey_to_packed(k));
- ret = bch2_bkey_validate(c, bkey_i_to_s_c(k),
- __btree_node_type(level, btree_id), write);
+ ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), from);
if (ret == -BCH_ERR_fsck_delete_bkey) {
le16_add_cpu(&entry->u64s, -((u16) k->k.u64s));
memmove(k, bkey_next(k), next - (void *) bkey_next(k));
@@ -381,7 +380,7 @@ static int journal_validate_key(struct bch_fs *c,
goto fsck_err;
if (write)
- bch2_bkey_compat(level, btree_id, version, big_endian,
+ bch2_bkey_compat(from.level, from.btree, version, big_endian,
write, NULL, bkey_to_packed(k));
fsck_err:
return ret;
@@ -394,13 +393,15 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c,
enum bch_validate_flags flags)
{
struct bkey_i *k = entry->start;
+ struct bkey_validate_context from = {
+ .from = BKEY_VALIDATE_journal,
+ .level = entry->level,
+ .btree = entry->btree_id,
+ .flags = flags|BCH_VALIDATE_journal,
+ };
while (k != vstruct_last(entry)) {
- int ret = journal_validate_key(c, jset, entry,
- entry->level,
- entry->btree_id,
- k, version, big_endian,
- flags|BCH_VALIDATE_journal);
+ int ret = journal_validate_key(c, jset, entry, k, from, version, big_endian);
if (ret == FSCK_DELETED_KEY)
continue;
else if (ret)
@@ -455,8 +456,14 @@ static int journal_entry_btree_root_validate(struct bch_fs *c,
return 0;
}
- ret = journal_validate_key(c, jset, entry, 1, entry->btree_id, k,
- version, big_endian, flags);
+ struct bkey_validate_context from = {
+ .from = BKEY_VALIDATE_journal,
+ .level = entry->level + 1,
+ .btree = entry->btree_id,
+ .root = true,
+ .flags = flags,
+ };
+ ret = journal_validate_key(c, jset, entry, k, from, version, big_endian);
if (ret == FSCK_DELETED_KEY)
ret = 0;
fsck_err:
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index c18242748ca3..ce794d55818f 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -12,7 +12,7 @@
/* KEY_TYPE_lru is obsolete: */
int bch2_lru_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
int ret = 0;
diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
index e6a7d8241bb8..f31a6cf1514c 100644
--- a/fs/bcachefs/lru.h
+++ b/fs/bcachefs/lru.h
@@ -33,7 +33,7 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l)
return BCH_LRU_read;
}
-int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
+int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, struct bkey_validate_context);
void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
void bch2_lru_pos_to_text(struct printbuf *, struct bpos);
diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c
index 74f45a8162ad..8b857fc33244 100644
--- a/fs/bcachefs/quota.c
+++ b/fs/bcachefs/quota.c
@@ -60,7 +60,7 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = {
};
int bch2_quota_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
int ret = 0;
diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h
index a62abcc5332a..1551800ff44c 100644
--- a/fs/bcachefs/quota.h
+++ b/fs/bcachefs/quota.h
@@ -5,10 +5,10 @@
#include "inode.h"
#include "quota_types.h"
-enum bch_validate_flags;
extern const struct bch_sb_field_ops bch_sb_field_ops_quota;
-int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
+int bch2_quota_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_quota ((struct bkey_ops) { \
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 727e894762f5..e361057ffad4 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -569,6 +569,7 @@ static int read_btree_roots(struct bch_fs *c)
r->error = 0;
ret = bch2_btree_lost_data(c, i);
+ BUG_ON(ret);
}
}
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index 38db5a011702..e1911b9beb61 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -41,7 +41,7 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k)
/* reflink pointers */
int bch2_reflink_p_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
int ret = 0;
@@ -89,7 +89,7 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
/* indirect extents */
int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
int ret = 0;
@@ -98,7 +98,7 @@ int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k,
"indirect extent above maximum position 0:%llu",
REFLINK_P_IDX_MAX);
- ret = bch2_bkey_ptrs_validate(c, k, flags);
+ ret = bch2_bkey_ptrs_validate(c, k, from);
fsck_err:
return ret;
}
@@ -128,7 +128,7 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
/* indirect inline data */
int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
return 0;
}
diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h
index b61a4bdd8e82..f119316adc81 100644
--- a/fs/bcachefs/reflink.h
+++ b/fs/bcachefs/reflink.h
@@ -2,9 +2,8 @@
#ifndef _BCACHEFS_REFLINK_H
#define _BCACHEFS_REFLINK_H
-enum bch_validate_flags;
-
-int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
+int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned,
@@ -19,7 +18,8 @@ int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned,
.min_val_size = 16, \
})
-int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
+int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, struct bkey_s,
@@ -34,7 +34,7 @@ int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned,
})
int bch2_indirect_inline_data_validate(struct bch_fs *, struct bkey_s_c,
- enum bch_validate_flags);
+ struct bkey_validate_context);
void bch2_indirect_inline_data_to_text(struct printbuf *,
struct bch_fs *, struct bkey_s_c);
int bch2_trigger_indirect_inline_data(struct btree_trans *,
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 6a52090485dc..f368270d6d9b 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -32,7 +32,7 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c,
}
int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
int ret = 0;
@@ -225,7 +225,7 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c,
}
int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_snapshot s;
u32 i, id;
diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h
index 29c94716293e..ae23d45fad66 100644
--- a/fs/bcachefs/snapshot.h
+++ b/fs/bcachefs/snapshot.h
@@ -2,11 +2,9 @@
#ifndef _BCACHEFS_SNAPSHOT_H
#define _BCACHEFS_SNAPSHOT_H
-enum bch_validate_flags;
-
void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
int bch2_snapshot_tree_validate(struct bch_fs *, struct bkey_s_c,
- enum bch_validate_flags);
+ struct bkey_validate_context);
#define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \
.key_validate = bch2_snapshot_tree_validate, \
@@ -19,7 +17,8 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *);
int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *);
void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
+int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, struct bkey_s,
enum btree_iter_update_trigger_flags);
diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
index cb45ef769c54..5e5ae405cb28 100644
--- a/fs/bcachefs/subvolume.c
+++ b/fs/bcachefs/subvolume.c
@@ -207,7 +207,7 @@ int bch2_check_subvol_children(struct bch_fs *c)
/* Subvolumes: */
int bch2_subvolume_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k);
int ret = 0;
diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h
index 07b23dc08614..d53d292c22d7 100644
--- a/fs/bcachefs/subvolume.h
+++ b/fs/bcachefs/subvolume.h
@@ -5,12 +5,11 @@
#include "darray.h"
#include "subvolume_types.h"
-enum bch_validate_flags;
-
int bch2_check_subvols(struct bch_fs *);
int bch2_check_subvol_children(struct bch_fs *);
-int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
+int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, struct bkey_s,
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index 820c1791545a..aed7c6984173 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -71,7 +71,7 @@ const struct bch_hash_desc bch2_xattr_hash_desc = {
};
int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len,
diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h
index 2c96de051f3e..132fbbd15a66 100644
--- a/fs/bcachefs/xattr.h
+++ b/fs/bcachefs/xattr.h
@@ -6,7 +6,8 @@
extern const struct bch_hash_desc bch2_xattr_hash_desc;
-int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags);
+int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c,
+ struct bkey_validate_context);
void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_xattr ((struct bkey_ops) { \
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 18/34] bcachefs: Make topology errors autofix
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (16 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 17/34] bcachefs: struct bkey_validate_context Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 19/34] bcachefs: BCH_FS_recovery_running Kent Overstreet
` (15 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
These repair paths are well tested, we can repair them without explicit
user intervention
This also tweaks bch2_topology_error() so that we run topology repair if
we're in recovery, not just fsck.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/btree_gc.c | 2 +-
fs/bcachefs/recovery.c | 31 +++++++++++++++++++++++++------
fs/bcachefs/sb-errors_format.h | 12 ++++++------
3 files changed, 32 insertions(+), 13 deletions(-)
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 2e8cfc4d3265..19db4d8aca88 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -348,7 +348,7 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
- trans, btree_node_unreadable,
+ trans, btree_node_read_error,
"Topology repair: unreadable btree node at\n"
" %s",
buf.buf)) {
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index e361057ffad4..64bb330eac86 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -40,19 +40,42 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
int ret = 0;
mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
if (!(c->sb.btrees_lost_data & b)) {
struct printbuf buf = PRINTBUF;
bch2_btree_id_to_text(&buf, btree);
bch_err(c, "flagging btree %s lost data", buf.buf);
printbuf_exit(&buf);
- bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b);
+ ext->btrees_lost_data |= cpu_to_le64(b);
}
+ /* Once we have runtime self healing for topology errors we won't need this: */
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret;
+
+ /* Btree node accounting will be off: */
+ __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+ /*
+ * These are much more minor, and don't need to be corrected right away,
+ * but in debug mode we want the next fsck run to be clean:
+ */
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret;
+ ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret;
+#endif
+
switch (btree) {
case BTREE_ID_alloc:
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
+
+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent);
+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent);
+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent);
+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent);
goto out;
case BTREE_ID_backpointers:
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret;
@@ -75,7 +98,6 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
goto out;
default:
ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
- ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret;
goto out;
}
out:
@@ -748,9 +770,6 @@ int bch2_fs_recovery(struct bch_fs *c)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
- if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
-
if (c->opts.fsck)
set_bit(BCH_FS_fsck_running, &c->flags);
if (c->sb.clean)
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 89d9dc2c859b..917ef6aa4a23 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -72,12 +72,12 @@ enum bch_fsck_flags {
x(btree_root_read_error, 59, FSCK_AUTOFIX) \
x(btree_root_bad_min_key, 60, 0) \
x(btree_root_bad_max_key, 61, 0) \
- x(btree_node_read_error, 62, 0) \
- x(btree_node_topology_bad_min_key, 63, 0) \
- x(btree_node_topology_bad_max_key, 64, 0) \
- x(btree_node_topology_overwritten_by_prev_node, 65, 0) \
- x(btree_node_topology_overwritten_by_next_node, 66, 0) \
- x(btree_node_topology_interior_node_empty, 67, 0) \
+ x(btree_node_read_error, 62, FSCK_AUTOFIX) \
+ x(btree_node_topology_bad_min_key, 63, FSCK_AUTOFIX) \
+ x(btree_node_topology_bad_max_key, 64, FSCK_AUTOFIX) \
+ x(btree_node_topology_overwritten_by_prev_node, 65, FSCK_AUTOFIX) \
+ x(btree_node_topology_overwritten_by_next_node, 66, FSCK_AUTOFIX) \
+ x(btree_node_topology_interior_node_empty, 67, FSCK_AUTOFIX) \
x(fs_usage_hidden_wrong, 68, FSCK_AUTOFIX) \
x(fs_usage_btree_wrong, 69, FSCK_AUTOFIX) \
x(fs_usage_data_wrong, 70, FSCK_AUTOFIX) \
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 19/34] bcachefs: BCH_FS_recovery_running
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (17 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 18/34] bcachefs: Make topology errors autofix Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 20/34] bcachefs: dio write: Take ref on mm_struct when using asynchronously Kent Overstreet
` (14 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
If we're autofixing topology errors, we shouldn't shutdown if we're
still in recovery.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/bcachefs.h | 1 +
fs/bcachefs/error.c | 2 +-
fs/bcachefs/recovery.c | 2 ++
3 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index a85b3bcc6383..d88129503bc5 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -614,6 +614,7 @@ struct bch_dev {
x(going_ro) \
x(write_disable_complete) \
x(clean_shutdown) \
+ x(recovery_running) \
x(fsck_running) \
x(initial_gc_unfixed) \
x(need_delete_dead_snapshots) \
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 9a695322b33c..5b67361b0cf1 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -34,7 +34,7 @@ bool bch2_inconsistent_error(struct bch_fs *c)
int bch2_topology_error(struct bch_fs *c)
{
set_bit(BCH_FS_topology_error, &c->flags);
- if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
+ if (!test_bit(BCH_FS_recovery_running, &c->flags)) {
bch2_inconsistent_error(c);
return -BCH_ERR_btree_need_topology_repair;
} else {
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 64bb330eac86..c50dede64785 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -774,6 +774,7 @@ int bch2_fs_recovery(struct bch_fs *c)
set_bit(BCH_FS_fsck_running, &c->flags);
if (c->sb.clean)
set_bit(BCH_FS_clean_recovery, &c->flags);
+ set_bit(BCH_FS_recovery_running, &c->flags);
ret = bch2_blacklist_table_initialize(c);
if (ret) {
@@ -925,6 +926,7 @@ int bch2_fs_recovery(struct bch_fs *c)
*/
set_bit(BCH_FS_may_go_rw, &c->flags);
clear_bit(BCH_FS_fsck_running, &c->flags);
+ clear_bit(BCH_FS_recovery_running, &c->flags);
/* in case we don't run journal replay, i.e. norecovery mode */
set_bit(BCH_FS_accounting_replay_done, &c->flags);
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 20/34] bcachefs: dio write: Take ref on mm_struct when using asynchronously
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (18 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 19/34] bcachefs: BCH_FS_recovery_running Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 22:38 ` Jens Axboe
2024-12-05 1:55 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 21/34] bcachefs: Guard against journal seq overflow Kent Overstreet
` (13 subsequent siblings)
33 siblings, 2 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, Jann Horn, Jens Axboe
When invoked from aio, mm_struct is guaranteed to outlive the request
since its lifetime is tied to the io_context - but that's not the case
for io_uring, it's possible that a process could be killed and mm_struct
goes away while a request is in flight.
So if we're submitting the rest of the io asynchronously, we may need a
ref on mm_struct.
Per Jens, this is not actually a bug because we're not yet flipping on
FMODE_NOWAIT, meaning io_uring will do the submission from an io_worker
kthread - but this patch is necessary for safely flipping on
FMODE_NOWAIT for more efficient submissions in the future.
Reported-by: Jann Horn <jannh@google.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/fs-io-direct.c | 42 ++++++++++++++++++++++++++++++++------
1 file changed, 36 insertions(+), 6 deletions(-)
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 2089c36b5866..b0367b9d9e07 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -226,6 +226,7 @@ struct dio_write {
struct mm_struct *mm;
const struct iovec *iov;
unsigned loop:1,
+ have_mm_ref:1,
extending:1,
sync:1,
flush:1;
@@ -390,6 +391,9 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
kfree(dio->iov);
+ if (dio->have_mm_ref)
+ mmdrop(dio->mm);
+
ret = dio->op.error ?: ((long) dio->written << 9);
bio_put(&dio->op.wbio.bio);
@@ -529,9 +533,24 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
if (unlikely(dio->iter.count) &&
!dio->sync &&
- !dio->loop &&
- bch2_dio_write_copy_iov(dio))
- dio->sync = sync = true;
+ !dio->loop) {
+ /*
+ * Rest of write will be submitted asynchronously -
+ * unless copying the iov fails:
+ */
+ if (likely(!bch2_dio_write_copy_iov(dio))) {
+ /*
+ * aio guarantees that mm_struct outlives the
+ * request, but io_uring does not
+ */
+ if (dio->mm) {
+ mmgrab(dio->mm);
+ dio->have_mm_ref = true;
+ }
+ } else {
+ dio->sync = sync = true;
+ }
+ }
dio->loop = true;
closure_call(&dio->op.cl, bch2_write, NULL, NULL);
@@ -559,15 +578,25 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
static noinline __cold void bch2_dio_write_continue(struct dio_write *dio)
{
- struct mm_struct *mm = dio->mm;
+ struct mm_struct *mm = dio->have_mm_ref ? dio->mm: NULL;
bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE);
- if (mm)
+ if (mm) {
+ if (unlikely(!mmget_not_zero(mm))) {
+ /* process exited */
+ dio->op.error = -ESRCH;
+ bch2_dio_write_done(dio);
+ return;
+ }
+
kthread_use_mm(mm);
+ }
bch2_dio_write_loop(dio);
- if (mm)
+ if (mm) {
kthread_unuse_mm(mm);
+ mmput(mm);
+ }
}
static void bch2_dio_write_loop_async(struct bch_write_op *op)
@@ -641,6 +670,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
dio->mm = current->mm;
dio->iov = NULL;
dio->loop = false;
+ dio->have_mm_ref = false;
dio->extending = extending;
dio->sync = is_sync_kiocb(req) || extending;
dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled;
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* Re: [PATCH 20/34] bcachefs: dio write: Take ref on mm_struct when using asynchronously
2024-11-29 20:27 ` [PATCH 20/34] bcachefs: dio write: Take ref on mm_struct when using asynchronously Kent Overstreet
@ 2024-11-29 22:38 ` Jens Axboe
2024-12-05 1:55 ` Kent Overstreet
1 sibling, 0 replies; 37+ messages in thread
From: Jens Axboe @ 2024-11-29 22:38 UTC (permalink / raw)
To: Kent Overstreet, linux-bcachefs; +Cc: Jann Horn
Looks good to me:
Reviewed-by: Jens Axboe <axboe@kernel.dk>
--
Jens Axboe
^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 20/34] bcachefs: dio write: Take ref on mm_struct when using asynchronously
2024-11-29 20:27 ` [PATCH 20/34] bcachefs: dio write: Take ref on mm_struct when using asynchronously Kent Overstreet
2024-11-29 22:38 ` Jens Axboe
@ 2024-12-05 1:55 ` Kent Overstreet
1 sibling, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-12-05 1:55 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Jann Horn, Jens Axboe
On Fri, Nov 29, 2024 at 03:27:19PM -0500, Kent Overstreet wrote:
> When invoked from aio, mm_struct is guaranteed to outlive the request
> since its lifetime is tied to the io_context - but that's not the case
> for io_uring, it's possible that a process could be killed and mm_struct
> goes away while a request is in flight.
>
> So if we're submitting the rest of the io asynchronously, we may need a
> ref on mm_struct.
>
> Per Jens, this is not actually a bug because we're not yet flipping on
> FMODE_NOWAIT, meaning io_uring will do the submission from an io_worker
> kthread - but this patch is necessary for safely flipping on
> FMODE_NOWAIT for more efficient submissions in the future.
>
> Reported-by: Jann Horn <jannh@google.com>
> Cc: Jens Axboe <axboe@kernel.dk>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
turns out this introduces a rare deadlock in exit_aio()
> ---
> fs/bcachefs/fs-io-direct.c | 42 ++++++++++++++++++++++++++++++++------
> 1 file changed, 36 insertions(+), 6 deletions(-)
>
> diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
> index 2089c36b5866..b0367b9d9e07 100644
> --- a/fs/bcachefs/fs-io-direct.c
> +++ b/fs/bcachefs/fs-io-direct.c
> @@ -226,6 +226,7 @@ struct dio_write {
> struct mm_struct *mm;
> const struct iovec *iov;
> unsigned loop:1,
> + have_mm_ref:1,
> extending:1,
> sync:1,
> flush:1;
> @@ -390,6 +391,9 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
>
> kfree(dio->iov);
>
> + if (dio->have_mm_ref)
> + mmdrop(dio->mm);
> +
> ret = dio->op.error ?: ((long) dio->written << 9);
> bio_put(&dio->op.wbio.bio);
>
> @@ -529,9 +533,24 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
>
> if (unlikely(dio->iter.count) &&
> !dio->sync &&
> - !dio->loop &&
> - bch2_dio_write_copy_iov(dio))
> - dio->sync = sync = true;
> + !dio->loop) {
> + /*
> + * Rest of write will be submitted asynchronously -
> + * unless copying the iov fails:
> + */
> + if (likely(!bch2_dio_write_copy_iov(dio))) {
> + /*
> + * aio guarantees that mm_struct outlives the
> + * request, but io_uring does not
> + */
> + if (dio->mm) {
> + mmgrab(dio->mm);
> + dio->have_mm_ref = true;
> + }
> + } else {
> + dio->sync = sync = true;
> + }
> + }
>
> dio->loop = true;
> closure_call(&dio->op.cl, bch2_write, NULL, NULL);
> @@ -559,15 +578,25 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
>
> static noinline __cold void bch2_dio_write_continue(struct dio_write *dio)
> {
> - struct mm_struct *mm = dio->mm;
> + struct mm_struct *mm = dio->have_mm_ref ? dio->mm: NULL;
>
> bio_reset(&dio->op.wbio.bio, NULL, REQ_OP_WRITE);
>
> - if (mm)
> + if (mm) {
> + if (unlikely(!mmget_not_zero(mm))) {
> + /* process exited */
> + dio->op.error = -ESRCH;
> + bch2_dio_write_done(dio);
> + return;
> + }
> +
> kthread_use_mm(mm);
> + }
> bch2_dio_write_loop(dio);
> - if (mm)
> + if (mm) {
> kthread_unuse_mm(mm);
> + mmput(mm);
> + }
> }
>
> static void bch2_dio_write_loop_async(struct bch_write_op *op)
> @@ -641,6 +670,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
> dio->mm = current->mm;
> dio->iov = NULL;
> dio->loop = false;
> + dio->have_mm_ref = false;
> dio->extending = extending;
> dio->sync = is_sync_kiocb(req) || extending;
> dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled;
> --
> 2.45.2
>
^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 21/34] bcachefs: Guard against journal seq overflow
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (19 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 20/34] bcachefs: dio write: Take ref on mm_struct when using asynchronously Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 22/34] bcachefs: Issue a transaction restart after commit in repair Kent Overstreet
` (12 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+73ed43fbe826227bd4e0
Wraparound is impractical to handle since in various places we use 0 as
a sentinal value - but 64 bits (or 56, because the btree write buffer
steals a few bits) is enough for all practical purposes.
Reported-by: syzbot+73ed43fbe826227bd4e0@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/journal.c | 9 +++++++++
fs/bcachefs/journal_types.h | 3 +++
2 files changed, 12 insertions(+)
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 95cccda3b22c..dc66521964b7 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -382,6 +382,10 @@ static int journal_entry_open(struct journal *j)
if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
return JOURNAL_ERR_max_in_flight;
+ if (bch2_fs_fatal_err_on(journal_cur_seq(j) >= JOURNAL_SEQ_MAX,
+ c, "cannot start: journal seq overflow"))
+ return JOURNAL_ERR_insufficient_devices; /* -EROFS */
+
BUG_ON(!j->cur_entry_sectors);
buf->expires =
@@ -1270,6 +1274,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
bool had_entries = false;
u64 last_seq = cur_seq, nr, seq;
+ if (cur_seq >= JOURNAL_SEQ_MAX) {
+ bch_err(c, "cannot start: journal seq overflow");
+ return -EINVAL;
+ }
+
genradix_for_each_reverse(&c->journal_entries, iter, _i) {
i = *_i;
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 425d1abb257e..e9bd716fbb71 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -9,6 +9,9 @@
#include "super_types.h"
#include "fifo.h"
+/* btree write buffer steals 8 bits for its own purposes: */
+#define JOURNAL_SEQ_MAX ((1ULL << 56) - 1)
+
#define JOURNAL_BUF_BITS 2
#define JOURNAL_BUF_NR (1U << JOURNAL_BUF_BITS)
#define JOURNAL_BUF_MASK (JOURNAL_BUF_NR - 1)
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 22/34] bcachefs: Issue a transaction restart after commit in repair
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (20 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 21/34] bcachefs: Guard against journal seq overflow Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 23/34] bcachefs: Guard against backpointers to unknown btrees Kent Overstreet
` (11 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+78d82470c16a49702682
transaction commits invalidate pointers to btree values, and they also
downgrade intent locks.
This breaks the interior btree update path, which takes intent locks and
then calls into the allocator.
This isn't an ideal solution: we can't unconditionally issue a restart
after a transaction commit, because that would break other codepaths.
Reported-by: syzbot+78d82470c16a49702682@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/alloc_background.c | 2 +-
fs/bcachefs/errcode.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 8846daaa1162..79af226ca609 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -1384,7 +1384,7 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite
ret = bch2_btree_bit_mod_iter(trans, iter, false) ?:
bch2_trans_commit(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc) ?:
- 1;
+ -BCH_ERR_transaction_restart_commit;
goto out;
}
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index a12050e9c191..a0cfc0f286f4 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -148,6 +148,7 @@
x(BCH_ERR_transaction_restart, transaction_restart_split_race) \
x(BCH_ERR_transaction_restart, transaction_restart_write_buffer_flush) \
x(BCH_ERR_transaction_restart, transaction_restart_nested) \
+ x(BCH_ERR_transaction_restart, transaction_restart_commit) \
x(0, no_btree_node) \
x(BCH_ERR_no_btree_node, no_btree_node_relock) \
x(BCH_ERR_no_btree_node, no_btree_node_upgrade) \
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 23/34] bcachefs: Guard against backpointers to unknown btrees
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (21 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 22/34] bcachefs: Issue a transaction restart after commit in repair Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 24/34] bcachefs: Fix journal_iter list corruption Kent Overstreet
` (10 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+997f0573004dcb964555
Reported-by: syzbot+997f0573004dcb964555@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/backpointers.c | 7 +++++--
fs/bcachefs/sb-errors_format.h | 2 +-
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index ff08afd667a0..702bf62d7fa7 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -249,9 +249,12 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
struct btree_iter *iter,
unsigned iter_flags)
{
- if (likely(!bp.v->level)) {
- struct bch_fs *c = trans->c;
+ struct bch_fs *c = trans->c;
+ if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c)))
+ return bkey_s_c_null;
+
+ if (likely(!bp.v->level)) {
bch2_trans_node_iter_init(trans, iter,
bp.v->btree_id,
bp.v->pos,
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 917ef6aa4a23..e73d1c60198e 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -67,7 +67,7 @@ enum bch_fsck_flags {
x(btree_node_bkey_past_bset_end, 54, 0) \
x(btree_node_bkey_bad_format, 55, 0) \
x(btree_node_bad_bkey, 56, 0) \
- x(btree_node_bkey_out_of_order, 57, 0) \
+ x(btree_node_bkey_out_of_order, 57, FSCK_AUTOFIX) \
x(btree_root_bkey_invalid, 58, FSCK_AUTOFIX) \
x(btree_root_read_error, 59, FSCK_AUTOFIX) \
x(btree_root_bad_min_key, 60, 0) \
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 24/34] bcachefs: Fix journal_iter list corruption
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (22 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 23/34] bcachefs: Guard against backpointers to unknown btrees Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 25/34] bcachefs: add missing printbuf_reset() Kent Overstreet
` (9 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+2f7c2225ed8a5cb24af1
Fix exiting an iterator that wasn't initialized.
Reported-by: syzbot+2f7c2225ed8a5cb24af1@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/btree_update_interior.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 56a70e95ef9a..5eabd532e388 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -58,6 +58,10 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
!bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
b->data->min_key));
+ bch2_bkey_buf_init(&prev);
+ bkey_init(&prev.k->k);
+ bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
+
if (b == btree_node_root(c, b)) {
if (!bpos_eq(b->data->min_key, POS_MIN)) {
printbuf_reset(&buf);
@@ -77,11 +81,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
}
if (!b->c.level)
- return 0;
-
- bch2_bkey_buf_init(&prev);
- bkey_init(&prev.k->k);
- bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
+ goto out;
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
if (k.k->type != KEY_TYPE_btree_ptr_v2)
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 25/34] bcachefs: add missing printbuf_reset()
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (23 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 24/34] bcachefs: Fix journal_iter list corruption Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 26/34] bcachefs: mark more errors AUTOFIX Kent Overstreet
` (8 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/btree_gc.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 19db4d8aca88..e59924cfe2bc 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -521,6 +521,7 @@ int bch2_check_topology(struct bch_fs *c)
struct btree_root *r = bch2_btree_id_root(c, i);
bool reconstructed_root = false;
+ printbuf_reset(&buf);
bch2_btree_id_to_text(&buf, i);
if (r->error) {
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 26/34] bcachefs: mark more errors AUTOFIX
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (24 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 25/34] bcachefs: add missing printbuf_reset() Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 27/34] bcachefs: Don't error out when logging fsck error Kent Overstreet
` (7 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
mark errors as autofix where syzbot has hit the repair paths
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/sb-errors_format.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index e73d1c60198e..382fcafa815a 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -124,9 +124,9 @@ enum bch_fsck_flags {
x(alloc_key_stripe_redundancy_wrong, 111, FSCK_AUTOFIX) \
x(bucket_sector_count_overflow, 112, 0) \
x(bucket_metadata_type_mismatch, 113, 0) \
- x(need_discard_key_wrong, 114, 0) \
- x(freespace_key_wrong, 115, 0) \
- x(freespace_hole_missing, 116, 0) \
+ x(need_discard_key_wrong, 114, FSCK_AUTOFIX) \
+ x(freespace_key_wrong, 115, FSCK_AUTOFIX) \
+ x(freespace_hole_missing, 116, FSCK_AUTOFIX) \
x(bucket_gens_val_size_bad, 117, 0) \
x(bucket_gens_key_wrong, 118, FSCK_AUTOFIX) \
x(bucket_gens_hole_wrong, 119, FSCK_AUTOFIX) \
@@ -288,7 +288,7 @@ enum bch_fsck_flags {
x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \
x(snapshot_node_missing, 264, 0) \
x(dup_backpointer_to_bad_csum_extent, 265, 0) \
- x(btree_bitmap_not_marked, 266, 0) \
+ x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \
x(sb_clean_entry_overrun, 267, 0) \
x(btree_ptr_v2_written_0, 268, 0) \
x(subvol_snapshot_bad, 269, 0) \
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 27/34] bcachefs: Don't error out when logging fsck error
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (25 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 26/34] bcachefs: mark more errors AUTOFIX Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 28/34] bcachefs: do_fsck_ask_yn() Kent Overstreet
` (6 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/alloc_background.c | 8 +++++---
fs/bcachefs/error.c | 29 +++++++++++++++++------------
fs/bcachefs/error.h | 14 +++++++++-----
3 files changed, 31 insertions(+), 20 deletions(-)
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 79af226ca609..6de0387ede99 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -676,6 +676,10 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans,
set ? "" : "un",
bch2_btree_id_str(btree),
buf.buf);
+ if (ret == -BCH_ERR_fsck_ignore ||
+ ret == -BCH_ERR_fsck_errors_not_fixed)
+ ret = 0;
+
printbuf_exit(&buf);
return ret;
}
@@ -1901,10 +1905,8 @@ static int bch2_do_discards_fast_one(struct btree_trans *trans,
if (log_fsck_err_on(discard_k.k->type != KEY_TYPE_set,
trans, discarding_bucket_not_in_need_discard_btree,
"attempting to discard bucket %u:%llu not in need_discard btree",
- ca->dev_idx, bucket)) {
- /* log it in the superblock and continue: */
+ ca->dev_idx, bucket))
goto out;
- }
ret = bch2_discard_one_bucket(trans, ca, &need_discard_iter, discard_pos_done, s, true);
out:
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 5b67361b0cf1..23b9ecbcf3cf 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -227,7 +227,7 @@ int __bch2_fsck_err(struct bch_fs *c,
{
struct fsck_err_state *s = NULL;
va_list args;
- bool print = true, suppressing = false, inconsistent = false;
+ bool print = true, suppressing = false, inconsistent = false, exiting = false;
struct printbuf buf = PRINTBUF, *out = &buf;
int ret = -BCH_ERR_fsck_ignore;
const char *action_orig = "fix?", *action = action_orig;
@@ -320,13 +320,19 @@ int __bch2_fsck_err(struct bch_fs *c,
prt_printf(out, bch2_log_msg(c, ""));
#endif
- if ((flags & FSCK_CAN_FIX) &&
- (flags & FSCK_AUTOFIX) &&
+ if ((flags & FSCK_AUTOFIX) &&
(c->opts.errors == BCH_ON_ERROR_continue ||
c->opts.errors == BCH_ON_ERROR_fix_safe)) {
prt_str(out, ", ");
- prt_actioning(out, action);
- ret = -BCH_ERR_fsck_fix;
+ if (flags & FSCK_CAN_FIX) {
+ prt_actioning(out, action);
+ ret = -BCH_ERR_fsck_fix;
+ } else {
+ prt_str(out, ", continuing");
+ ret = -BCH_ERR_fsck_ignore;
+ }
+
+ goto print;
} else if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
if (c->opts.errors != BCH_ON_ERROR_continue ||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
@@ -396,14 +402,13 @@ int __bch2_fsck_err(struct bch_fs *c,
!(flags & FSCK_CAN_IGNORE)))
ret = -BCH_ERR_fsck_errors_not_fixed;
- bool exiting =
- test_bit(BCH_FS_fsck_running, &c->flags) &&
- (ret != -BCH_ERR_fsck_fix &&
- ret != -BCH_ERR_fsck_ignore);
-
- if (exiting)
+ if (test_bit(BCH_FS_fsck_running, &c->flags) &&
+ (ret != -BCH_ERR_fsck_fix &&
+ ret != -BCH_ERR_fsck_ignore)) {
+ exiting = true;
print = true;
-
+ }
+print:
if (print) {
if (bch2_fs_stdio_redirect(c))
bch2_print(c, "%s\n", out->buf);
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 3b278f28e56b..12ca5287e20a 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -45,12 +45,11 @@ int bch2_topology_error(struct bch_fs *);
bch2_inconsistent_error(c); \
})
-#define bch2_fs_inconsistent_on(cond, c, ...) \
+#define bch2_fs_inconsistent_on(cond, ...) \
({ \
bool _ret = unlikely(!!(cond)); \
- \
if (_ret) \
- bch2_fs_inconsistent(c, __VA_ARGS__); \
+ bch2_fs_inconsistent(__VA_ARGS__); \
_ret; \
})
@@ -146,8 +145,13 @@ void bch2_flush_fsck_errs(struct bch_fs *);
#define log_fsck_err(c, _err_type, ...) \
__fsck_err(c, FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
-#define log_fsck_err_on(cond, c, _err_type, ...) \
- __fsck_err_on(cond, c, FSCK_CAN_IGNORE, _err_type, __VA_ARGS__)
+#define log_fsck_err_on(cond, ...) \
+({ \
+ bool _ret = unlikely(!!(cond)); \
+ if (_ret) \
+ log_fsck_err(__VA_ARGS__); \
+ _ret; \
+})
enum bch_validate_flags;
__printf(5, 6)
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 28/34] bcachefs: do_fsck_ask_yn()
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (26 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 27/34] bcachefs: Don't error out when logging fsck error Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 29/34] bcachefs: Check for bucket journal seq in the future Kent Overstreet
` (5 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
__bch2_fsck_err() is huge, and badly needs more refactoring
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/error.c | 59 ++++++++++++++++++++++++++-------------------
1 file changed, 34 insertions(+), 25 deletions(-)
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 23b9ecbcf3cf..0517782ca57a 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -219,6 +219,30 @@ static const u8 fsck_flags_extra[] = {
#undef x
};
+static int do_fsck_ask_yn(struct bch_fs *c,
+ struct btree_trans *trans,
+ struct printbuf *question,
+ const char *action)
+{
+ prt_str(question, ", ");
+ prt_str(question, action);
+
+ if (bch2_fs_stdio_redirect(c))
+ bch2_print(c, "%s", question->buf);
+ else
+ bch2_print_string_as_lines(KERN_ERR, question->buf);
+
+ int ask = bch2_fsck_ask_yn(c, trans);
+
+ if (trans) {
+ int ret = bch2_trans_relock(trans);
+ if (ret)
+ return ret;
+ }
+
+ return ask;
+}
+
int __bch2_fsck_err(struct bch_fs *c,
struct btree_trans *trans,
enum bch_fsck_flags flags,
@@ -291,16 +315,14 @@ int __bch2_fsck_err(struct bch_fs *c,
*/
if (s->last_msg && !strcmp(buf.buf, s->last_msg)) {
ret = s->ret;
- mutex_unlock(&c->fsck_error_msgs_lock);
- goto err;
+ goto err_unlock;
}
kfree(s->last_msg);
s->last_msg = kstrdup(buf.buf, GFP_KERNEL);
if (!s->last_msg) {
- mutex_unlock(&c->fsck_error_msgs_lock);
ret = -ENOMEM;
- goto err;
+ goto err_unlock;
}
if (c->opts.ratelimit_errors &&
@@ -356,31 +378,18 @@ int __bch2_fsck_err(struct bch_fs *c,
: c->opts.fix_errors;
if (fix == FSCK_FIX_ask) {
- prt_str(out, ", ");
- prt_str(out, action);
-
- if (bch2_fs_stdio_redirect(c))
- bch2_print(c, "%s", out->buf);
- else
- bch2_print_string_as_lines(KERN_ERR, out->buf);
print = false;
- int ask = bch2_fsck_ask_yn(c, trans);
-
- if (trans) {
- ret = bch2_trans_relock(trans);
- if (ret) {
- mutex_unlock(&c->fsck_error_msgs_lock);
- goto err;
- }
- }
+ ret = do_fsck_ask_yn(c, trans, out, action);
+ if (ret < 0)
+ goto err_unlock;
- if (ask >= YN_ALLNO && s)
- s->fix = ask == YN_ALLNO
+ if (ret >= YN_ALLNO && s)
+ s->fix = ret == YN_ALLNO
? FSCK_FIX_no
: FSCK_FIX_yes;
- ret = ask & 1
+ ret = ret & 1
? -BCH_ERR_fsck_fix
: -BCH_ERR_fsck_ignore;
} else if (fix == FSCK_FIX_yes ||
@@ -424,8 +433,6 @@ int __bch2_fsck_err(struct bch_fs *c,
if (s)
s->ret = ret;
- mutex_unlock(&c->fsck_error_msgs_lock);
-
if (inconsistent)
bch2_inconsistent_error(c);
@@ -442,6 +449,8 @@ int __bch2_fsck_err(struct bch_fs *c,
set_bit(BCH_FS_error, &c->flags);
}
}
+err_unlock:
+ mutex_unlock(&c->fsck_error_msgs_lock);
err:
if (action != action_orig)
kfree(action);
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 29/34] bcachefs: Check for bucket journal seq in the future
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (27 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 28/34] bcachefs: do_fsck_ask_yn() Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 30/34] bcachefs: Check for inode " Kent Overstreet
` (4 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+85700120f75fc10d4e18
This fixes an assertion pop in bch2_journal_noflush_seq() - log the
error to the superblock and continue instead.
Reported-by: syzbot+85700120f75fc10d4e18@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/alloc_background.c | 63 +++++++++++++++++++---------------
fs/bcachefs/sb-errors_format.h | 3 +-
2 files changed, 37 insertions(+), 29 deletions(-)
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 6de0387ede99..e8c246e5803c 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -926,37 +926,43 @@ int bch2_trigger_alloc(struct btree_trans *trans,
}
if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) {
- u64 journal_seq = trans->journal_res.seq;
- u64 bucket_journal_seq = new_a->journal_seq;
+ u64 transaction_seq = trans->journal_res.seq;
- if ((flags & BTREE_TRIGGER_insert) &&
- data_type_is_empty(old_a->data_type) !=
- data_type_is_empty(new_a->data_type) &&
- new.k->type == KEY_TYPE_alloc_v4) {
- struct bch_alloc_v4 *v = bkey_s_to_alloc_v4(new).v;
+ if (log_fsck_err_on(transaction_seq && new_a->journal_seq > transaction_seq,
+ trans, alloc_key_journal_seq_in_future,
+ "bucket journal seq in future (currently at %llu)\n%s",
+ journal_cur_seq(&c->journal),
+ (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf)))
+ new_a->journal_seq = transaction_seq;
- /*
- * If the btree updates referring to a bucket weren't flushed
- * before the bucket became empty again, then the we don't have
- * to wait on a journal flush before we can reuse the bucket:
- */
- v->journal_seq = bucket_journal_seq =
- data_type_is_empty(new_a->data_type) &&
- (journal_seq == v->journal_seq ||
- bch2_journal_noflush_seq(&c->journal, v->journal_seq))
- ? 0 : journal_seq;
- }
+ int is_empty_delta = (int) data_type_is_empty(new_a->data_type) -
+ (int) data_type_is_empty(old_a->data_type);
- if (!data_type_is_empty(old_a->data_type) &&
- data_type_is_empty(new_a->data_type) &&
- bucket_journal_seq) {
- ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
- c->journal.flushed_seq_ondisk,
- new.k->p.inode, new.k->p.offset,
- bucket_journal_seq);
- if (bch2_fs_fatal_err_on(ret, c,
- "setting bucket_needs_journal_commit: %s", bch2_err_str(ret)))
- goto err;
+ /* Record journal sequence number of empty -> nonempty transition: */
+ if (is_empty_delta < 0)
+ new_a->journal_seq = max(new_a->journal_seq, transaction_seq);
+
+ /*
+ * Bucket becomes empty: mark it as waiting for a journal flush,
+ * unless updates since empty -> nonempty transition were never
+ * flushed - we may need to ask the journal not to flush
+ * intermediate sequence numbers:
+ */
+ if (is_empty_delta > 0) {
+ if (new_a->journal_seq == transaction_seq ||
+ bch2_journal_noflush_seq(&c->journal, new_a->journal_seq))
+ new_a->journal_seq = 0;
+ else {
+ new_a->journal_seq = transaction_seq;
+
+ ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
+ c->journal.flushed_seq_ondisk,
+ new.k->p.inode, new.k->p.offset,
+ transaction_seq);
+ if (bch2_fs_fatal_err_on(ret, c,
+ "setting bucket_needs_journal_commit: %s", bch2_err_str(ret)))
+ goto err;
+ }
}
if (new_a->gen != old_a->gen) {
@@ -1004,6 +1010,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
rcu_read_unlock();
}
err:
+fsck_err:
printbuf_exit(&buf);
bch2_dev_put(ca);
return ret;
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 382fcafa815a..8e3a6c5da10d 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -122,6 +122,7 @@ enum bch_fsck_flags {
x(alloc_key_cached_sectors_wrong, 109, FSCK_AUTOFIX) \
x(alloc_key_stripe_wrong, 110, FSCK_AUTOFIX) \
x(alloc_key_stripe_redundancy_wrong, 111, FSCK_AUTOFIX) \
+ x(alloc_key_journal_seq_in_future, 298, FSCK_AUTOFIX) \
x(bucket_sector_count_overflow, 112, 0) \
x(bucket_metadata_type_mismatch, 113, 0) \
x(need_discard_key_wrong, 114, FSCK_AUTOFIX) \
@@ -308,7 +309,7 @@ enum bch_fsck_flags {
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \
x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \
- x(MAX, 298, 0)
+ x(MAX, 299, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 30/34] bcachefs: Check for inode journal seq in the future
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (28 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 29/34] bcachefs: Check for bucket journal seq in the future Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 31/34] bcachefs: cryptographic MACs on superblock are not (yet?) supported Kent Overstreet
` (3 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+d119b445ec739e7f3068
More check and repair code: this fixes a warning in
bch2_journal_flush_seq_async()
Reported-by: syzbot+d119b445ec739e7f3068@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/fs-io.c | 35 +++++++++++++++++++++++++++++++---
fs/bcachefs/fsck.c | 13 ++++++++++++-
fs/bcachefs/sb-errors_format.h | 3 ++-
3 files changed, 46 insertions(+), 5 deletions(-)
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index c6fdfec51082..33d0e7080bf6 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -167,6 +167,34 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
/* fsync: */
+static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_inum inum,
+ u64 *seq)
+{
+ struct printbuf buf = PRINTBUF;
+ struct bch_inode_unpacked u;
+ struct btree_iter iter;
+ int ret = bch2_inode_peek(trans, &iter, &u, inum, 0);
+ if (ret)
+ return ret;
+
+ u64 cur_seq = journal_cur_seq(&trans->c->journal);
+ *seq = min(cur_seq, u.bi_journal_seq);
+
+ if (fsck_err_on(u.bi_journal_seq > cur_seq,
+ trans, inode_journal_seq_in_future,
+ "inode journal seq in future (currently at %llu)\n%s",
+ cur_seq,
+ (bch2_inode_unpacked_to_text(&buf, &u),
+ buf.buf))) {
+ u.bi_journal_seq = cur_seq;
+ ret = bch2_inode_write(trans, &iter, &u);
+ }
+fsck_err:
+ bch2_trans_iter_exit(trans, &iter);
+ printbuf_exit(&buf);
+ return ret;
+}
+
/*
* inode->ei_inode.bi_journal_seq won't be up to date since it's set in an
* insert trigger: look up the btree inode instead
@@ -180,9 +208,10 @@ static int bch2_flush_inode(struct bch_fs *c,
if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync))
return -EROFS;
- struct bch_inode_unpacked u;
- int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?:
- bch2_journal_flush_seq(&c->journal, u.bi_journal_seq, TASK_INTERRUPTIBLE) ?:
+ u64 seq;
+ int ret = bch2_trans_commit_do(c, NULL, NULL, 0,
+ bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?:
+ bch2_journal_flush_seq(&c->journal, seq, TASK_INTERRUPTIBLE) ?:
bch2_inode_flush_nocow_writes(c, inode);
bch2_write_ref_put(c, BCH_WRITE_REF_fsync);
return ret;
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index e10abd2e6c69..f2174528ee5f 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -1392,7 +1392,7 @@ static int check_inode(struct btree_trans *trans,
if (fsck_err_on(!ret,
trans, inode_unlinked_and_not_open,
- "inode %llu%u unlinked and not open",
+ "inode %llu:%u unlinked and not open",
u.bi_inum, u.bi_snapshot)) {
ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot);
bch_err_msg(c, ret, "in fsck deleting inode");
@@ -1441,6 +1441,17 @@ static int check_inode(struct btree_trans *trans,
do_update = true;
}
}
+
+ if (fsck_err_on(u.bi_journal_seq > journal_cur_seq(&c->journal),
+ trans, inode_journal_seq_in_future,
+ "inode journal seq in future (currently at %llu)\n%s",
+ journal_cur_seq(&c->journal),
+ (printbuf_reset(&buf),
+ bch2_inode_unpacked_to_text(&buf, &u),
+ buf.buf))) {
+ u.bi_journal_seq = journal_cur_seq(&c->journal);
+ do_update = true;
+ }
do_update:
if (do_update) {
ret = __bch2_fsck_write_inode(trans, &u);
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 8e3a6c5da10d..342eda8ab69f 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -233,6 +233,7 @@ enum bch_fsck_flags {
x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \
x(inode_has_child_snapshots_wrong, 287, 0) \
x(inode_unreachable, 210, FSCK_AUTOFIX) \
+ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \
x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \
x(deleted_inode_missing, 212, FSCK_AUTOFIX) \
x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \
@@ -309,7 +310,7 @@ enum bch_fsck_flags {
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \
x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \
- x(MAX, 299, 0)
+ x(MAX, 300, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 31/34] bcachefs: cryptographic MACs on superblock are not (yet?) supported
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (29 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 30/34] bcachefs: Check for inode " Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 32/34] bcachefs: bch2_trans_relock() is trylock for lockdep Kent Overstreet
` (2 subsequent siblings)
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+dd3d9835055dacb66f35
We should add support for cryptographic macs on the superblock - and it
won't be hard, but it'll need an incompatible feature bit (and we have a
new incompatible feature versioning scheme coming).
For now, just add a guard to avoid a dull ptr deref in gen_poly_key().
Reported-by: syzbot+dd3d9835055dacb66f35@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/super-io.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 4c29f8215d54..6a086c1c4b14 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -677,7 +677,8 @@ static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf
}
enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb);
- if (csum_type >= BCH_CSUM_NR) {
+ if (csum_type >= BCH_CSUM_NR ||
+ bch2_csum_type_is_encryption(csum_type)) {
prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb));
return -BCH_ERR_invalid_sb_csum_type;
}
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 32/34] bcachefs: bch2_trans_relock() is trylock for lockdep
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (30 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 31/34] bcachefs: cryptographic MACs on superblock are not (yet?) supported Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 33/34] bcachefs: Check for extent crc uncompressed/compressed size mismatch Kent Overstreet
2024-11-29 20:27 ` [PATCH 34/34] bcachefs: Don't recurse in check_discard_freespace_key Kent Overstreet
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+e088be3c2d5c05aaac35
fix some spurious lockdep splats
Reported-by: syzbot+e088be3c2d5c05aaac35@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/btree_iter.c | 8 ++++----
fs/bcachefs/btree_locking.c | 2 +-
fs/bcachefs/btree_locking.h | 4 ++--
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 80c3b55ce763..9c54891c737a 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -1007,7 +1007,7 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
bch2_trans_unlock(trans);
cond_resched();
- trans_set_locked(trans);
+ trans_set_locked(trans, false);
if (unlikely(trans->memory_allocation_failure)) {
struct closure cl;
@@ -3248,7 +3248,7 @@ u32 bch2_trans_begin(struct btree_trans *trans)
trans->last_begin_ip = _RET_IP_;
- trans_set_locked(trans);
+ trans_set_locked(trans, false);
if (trans->restarted) {
bch2_btree_path_traverse_all(trans);
@@ -3354,7 +3354,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
trans->srcu_lock_time = jiffies;
trans->srcu_held = true;
- trans_set_locked(trans);
+ trans_set_locked(trans, false);
closure_init_stack_release(&trans->ref);
return trans;
@@ -3622,7 +3622,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
#ifdef CONFIG_LOCKDEP
fs_reclaim_acquire(GFP_KERNEL);
struct btree_trans *trans = bch2_trans_get(c);
- trans_set_locked(trans);
+ trans_set_locked(trans, false);
bch2_trans_put(trans);
fs_reclaim_release(GFP_KERNEL);
#endif
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index efe2a007b482..d343df9f0ad2 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -782,7 +782,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace)
return bch2_trans_relock_fail(trans, path, &f, trace);
}
- trans_set_locked(trans);
+ trans_set_locked(trans, true);
out:
bch2_trans_verify_locks(trans);
return 0;
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index ca4aeefd631e..7474ab6ce019 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -188,10 +188,10 @@ int bch2_six_check_for_deadlock(struct six_lock *lock, void *p);
/* lock: */
-static inline void trans_set_locked(struct btree_trans *trans)
+static inline void trans_set_locked(struct btree_trans *trans, bool try)
{
if (!trans->locked) {
- lock_acquire_exclusive(&trans->dep_map, 0, 0, NULL, _THIS_IP_);
+ lock_acquire_exclusive(&trans->dep_map, 0, try, NULL, _THIS_IP_);
trans->locked = true;
trans->last_unlock_ip = 0;
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 33/34] bcachefs: Check for extent crc uncompressed/compressed size mismatch
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (31 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 32/34] bcachefs: bch2_trans_relock() is trylock for lockdep Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
2024-11-29 20:27 ` [PATCH 34/34] bcachefs: Don't recurse in check_discard_freespace_key Kent Overstreet
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet, syzbot+50d3544c9b8db9c99fd2
When not compressed, these must be equal - this fixes an assertion pop
in bch2_rechecksum_bio().
Reported-by: syzbot+50d3544c9b8db9c99fd2@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/extents.c | 22 +++++++++++++---------
fs/bcachefs/sb-errors_format.h | 5 +++--
2 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index aa3b88291814..2fc9ace5533c 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -1323,9 +1323,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
case BCH_EXTENT_ENTRY_crc128:
crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
- bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size,
- c, ptr_crc_uncompressed_size_too_small,
- "checksum offset + key size > uncompressed size");
bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type),
c, ptr_crc_csum_type_unknown,
"invalid checksum type");
@@ -1333,6 +1330,19 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
c, ptr_crc_compression_type_unknown,
"invalid compression type");
+ bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size,
+ c, ptr_crc_uncompressed_size_too_small,
+ "checksum offset + key size > uncompressed size");
+ bkey_fsck_err_on(crc_is_encoded(crc) &&
+ (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) &&
+ (from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)),
+ c, ptr_crc_uncompressed_size_too_big,
+ "too large encoded extent");
+ bkey_fsck_err_on(!crc_is_compressed(crc) &&
+ crc.compressed_size != crc.uncompressed_size,
+ c, ptr_crc_uncompressed_size_mismatch,
+ "not compressed but compressed != uncompressed size");
+
if (bch2_csum_type_is_encryption(crc.csum_type)) {
if (nonce == UINT_MAX)
nonce = crc.offset + crc.nonce;
@@ -1346,12 +1356,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k,
"redundant crc entry");
crc_since_last_ptr = true;
- bkey_fsck_err_on(crc_is_encoded(crc) &&
- (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) &&
- (from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)),
- c, ptr_crc_uncompressed_size_too_big,
- "too large encoded extent");
-
size_ondisk = crc.compressed_size;
break;
case BCH_EXTENT_ENTRY_stripe_ptr:
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 342eda8ab69f..3bbda181f314 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -172,10 +172,11 @@ enum bch_fsck_flags {
x(ptr_bucket_data_type_mismatch, 155, 0) \
x(ptr_cached_and_erasure_coded, 156, 0) \
x(ptr_crc_uncompressed_size_too_small, 157, 0) \
+ x(ptr_crc_uncompressed_size_too_big, 161, 0) \
+ x(ptr_crc_uncompressed_size_mismatch, 300, 0) \
x(ptr_crc_csum_type_unknown, 158, 0) \
x(ptr_crc_compression_type_unknown, 159, 0) \
x(ptr_crc_redundant, 160, 0) \
- x(ptr_crc_uncompressed_size_too_big, 161, 0) \
x(ptr_crc_nonce_mismatch, 162, 0) \
x(ptr_stripe_redundant, 163, 0) \
x(reservation_key_nr_replicas_invalid, 164, 0) \
@@ -310,7 +311,7 @@ enum bch_fsck_flags {
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \
x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \
- x(MAX, 300, 0)
+ x(MAX, 301, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread* [PATCH 34/34] bcachefs: Don't recurse in check_discard_freespace_key
2024-11-29 20:26 [PATCH 00/34] a whole raft of bugfixes Kent Overstreet
` (32 preceding siblings ...)
2024-11-29 20:27 ` [PATCH 33/34] bcachefs: Check for extent crc uncompressed/compressed size mismatch Kent Overstreet
@ 2024-11-29 20:27 ` Kent Overstreet
33 siblings, 0 replies; 37+ messages in thread
From: Kent Overstreet @ 2024-11-29 20:27 UTC (permalink / raw)
To: linux-bcachefs; +Cc: Kent Overstreet
When calling check_discard_freeespace_key from the allocator, we can't
repair without recursing - run it asynchronously instead.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/alloc_background.c | 72 ++++++++++++++++++++++++++++++----
fs/bcachefs/alloc_background.h | 2 +-
fs/bcachefs/alloc_foreground.c | 2 +-
fs/bcachefs/bcachefs.h | 1 +
4 files changed, 67 insertions(+), 10 deletions(-)
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index e8c246e5803c..b2d570453351 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -1338,7 +1338,40 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
return ret;
}
-int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen)
+struct check_discard_freespace_key_async {
+ struct work_struct work;
+ struct bch_fs *c;
+ struct bbpos pos;
+};
+
+static int bch2_recheck_discard_freespace_key(struct btree_trans *trans, struct bbpos pos)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, pos.btree, pos.pos, 0);
+ int ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ u8 gen;
+ ret = k.k->type != KEY_TYPE_set
+ ? bch2_check_discard_freespace_key(trans, &iter, &gen, false)
+ : 0;
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static void check_discard_freespace_key_work(struct work_struct *work)
+{
+ struct check_discard_freespace_key_async *w =
+ container_of(work, struct check_discard_freespace_key_async, work);
+
+ bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos));
+ bch2_write_ref_put(w->c, BCH_WRITE_REF_check_discard_freespace_key);
+ kfree(w);
+}
+
+int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen,
+ bool async_repair)
{
struct bch_fs *c = trans->c;
enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard
@@ -1351,7 +1384,8 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite
u64 genbits = iter->pos.offset & (~0ULL << 56);
struct btree_iter alloc_iter;
- struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, BTREE_ITER_cached);
+ struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter,
+ BTREE_ID_alloc, bucket, BTREE_ITER_cached);
int ret = bkey_err(alloc_k);
if (ret)
return ret;
@@ -1392,17 +1426,39 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite
printbuf_exit(&buf);
return ret;
delete:
- ret = bch2_btree_bit_mod_iter(trans, iter, false) ?:
- bch2_trans_commit(trans, NULL, NULL,
- BCH_TRANS_COMMIT_no_enospc) ?:
- -BCH_ERR_transaction_restart_commit;
- goto out;
+ if (!async_repair) {
+ ret = bch2_btree_bit_mod_iter(trans, iter, false) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc) ?:
+ -BCH_ERR_transaction_restart_commit;
+ goto out;
+ } else {
+ /*
+ * We can't repair here when called from the allocator path: the
+ * commit will recurse back into the allocator
+ */
+ struct check_discard_freespace_key_async *w =
+ kzalloc(sizeof(*w), GFP_KERNEL);
+ if (!w)
+ goto out;
+
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_check_discard_freespace_key)) {
+ kfree(w);
+ goto out;
+ }
+
+ INIT_WORK(&w->work, check_discard_freespace_key_work);
+ w->c = c;
+ w->pos = BBPOS(iter->btree_id, iter->pos);
+ queue_work(c->write_ref_wq, &w->work);
+ goto out;
+ }
}
static int bch2_check_discard_freespace_key_fsck(struct btree_trans *trans, struct btree_iter *iter)
{
u8 gen;
- int ret = bch2_check_discard_freespace_key(trans, iter, &gen);
+ int ret = bch2_check_discard_freespace_key(trans, iter, &gen, false);
return ret < 0 ? ret : 0;
}
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index 8cacddd188f4..de25ba4ee94b 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -310,7 +310,7 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
struct bkey_s_c, struct bkey_s,
enum btree_iter_update_trigger_flags);
-int bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *);
+int bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *, bool);
int bch2_check_alloc_info(struct bch_fs *);
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
void bch2_dev_do_discards(struct bch_dev *);
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 4d1ff7f1f302..c40a76df76b8 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -281,7 +281,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
u64 b = freespace_iter->pos.offset & ~(~0ULL << 56);
u8 gen;
- int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen);
+ int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen, true);
if (ret < 0)
return ERR_PTR(ret);
if (ret)
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index d88129503bc5..c16937e54734 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -680,6 +680,7 @@ struct btree_trans_buf {
x(dio_write) \
x(discard) \
x(discard_fast) \
+ x(check_discard_freespace_key) \
x(invalidate) \
x(delete_dead_snapshots) \
x(gc_gens) \
--
2.45.2
^ permalink raw reply related [flat|nested] 37+ messages in thread