From: Kent Overstreet <kent.overstreet@linux.dev>
To: linux-bcachefs@vger.kernel.org
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Subject: [PATCH 12/18] bcachefs: bcachefs_metadata_version_stripe_lru
Date: Thu, 13 Feb 2025 13:45:57 -0500 [thread overview]
Message-ID: <20250213184607.18237-13-kent.overstreet@linux.dev> (raw)
In-Reply-To: <20250213184607.18237-1-kent.overstreet@linux.dev>
Add a persistent LRU for stripes, ordered by "number of empty blocks",
i.e. order in which we wish to reuse them.
This will replace the in-memory stripes heap, so we can kill off reading
stripes into memory at startup.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/alloc_background.c | 3 +-
fs/bcachefs/bcachefs_format.h | 3 +-
fs/bcachefs/ec.c | 51 ++++++++++++++++++++++++++++++++++
fs/bcachefs/ec.h | 27 ++++++++++++++++++
fs/bcachefs/lru.c | 7 +++++
fs/bcachefs/lru.h | 9 ++++--
fs/bcachefs/lru_format.h | 4 ++-
7 files changed, 99 insertions(+), 5 deletions(-)
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index c5c8497a6339..ecad4a78c3f7 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -1757,7 +1757,8 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)));
+ bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))) ?:
+ bch2_check_stripe_to_lru_refs(c);
bch2_bkey_buf_exit(&last_flushed, c);
bch_err_fn(c, ret);
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index bf3723a2bca4..b4ac311f21a1 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -688,7 +688,8 @@ struct bch_sb_field_ext {
x(autofix_errors, BCH_VERSION(1, 19)) \
x(directory_size, BCH_VERSION(1, 20)) \
x(cached_backpointers, BCH_VERSION(1, 21)) \
- x(stripe_backpointers, BCH_VERSION(1, 22))
+ x(stripe_backpointers, BCH_VERSION(1, 22)) \
+ x(stripe_lru, BCH_VERSION(1, 23))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 36590c0ce09f..1090cdb7d5cc 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -20,6 +20,7 @@
#include "io_read.h"
#include "io_write.h"
#include "keylist.h"
+#include "lru.h"
#include "recovery.h"
#include "replicas.h"
#include "super-io.h"
@@ -411,6 +412,15 @@ int bch2_trigger_stripe(struct btree_trans *trans,
(new_s->nr_blocks != old_s->nr_blocks ||
new_s->nr_redundant != old_s->nr_redundant));
+ if (flags & BTREE_TRIGGER_transactional) {
+ int ret = bch2_lru_change(trans,
+ BCH_LRU_STRIPE_FRAGMENTATION,
+ idx,
+ stripe_lru_pos(old_s),
+ stripe_lru_pos(new_s));
+ if (ret)
+ return ret;
+ }
if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
/*
@@ -1175,6 +1185,10 @@ static int ec_stripe_delete(struct btree_trans *trans, u64 idx)
return ret;
}
+/*
+ * XXX
+ * can we kill this and delete stripes from the trigger?
+ */
static void ec_stripe_delete_work(struct work_struct *work)
{
struct bch_fs *c =
@@ -2519,3 +2533,40 @@ int bch2_fs_ec_init(struct bch_fs *c)
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
BIOSET_NEED_BVECS);
}
+
+static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans,
+ struct bkey_s_c k,
+ struct bkey_buf *last_flushed)
+{
+ if (k.k->type != KEY_TYPE_stripe)
+ return 0;
+
+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+
+ u64 lru_idx = stripe_lru_pos(s.v);
+ if (lru_idx) {
+ int ret = bch2_lru_check_set(trans, BCH_LRU_STRIPE_FRAGMENTATION,
+ k.k->p.offset, lru_idx, k, last_flushed);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+int bch2_check_stripe_to_lru_refs(struct bch_fs *c)
+{
+ struct bkey_buf last_flushed;
+
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_stripes,
+ POS_MIN, BTREE_ITER_prefetch, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_check_stripe_to_lru_ref(trans, k, &last_flushed)));
+
+ bch2_bkey_buf_exit(&last_flushed, c);
+ bch_err_fn(c, ret);
+ return ret;
+}
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index 4c9511887655..cd1c837e4933 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -92,6 +92,31 @@ static inline void stripe_csum_set(struct bch_stripe *s,
memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]);
}
+#define STRIPE_LRU_POS_EMPTY 1
+
+static inline u64 stripe_lru_pos(const struct bch_stripe *s)
+{
+ if (!s)
+ return 0;
+
+ unsigned blocks_empty = 0, blocks_nonempty = 0;
+
+ for (unsigned i = 0; i < s->nr_blocks; i++) {
+ blocks_empty += !stripe_blockcount_get(s, i);
+ blocks_nonempty += !!stripe_blockcount_get(s, i);
+ }
+
+ /* Will be picked up by the stripe_delete worker */
+ if (!blocks_nonempty)
+ return STRIPE_LRU_POS_EMPTY;
+
+ if (!blocks_empty)
+ return 0;
+
+ /* invert: more blocks empty = reuse first */
+ return LRU_TIME_MAX - blocks_empty;
+}
+
static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr,
const struct bch_extent_ptr *data_ptr,
unsigned sectors)
@@ -282,4 +307,6 @@ void bch2_fs_ec_exit(struct bch_fs *);
void bch2_fs_ec_init_early(struct bch_fs *);
int bch2_fs_ec_init(struct bch_fs *);
+int bch2_check_stripe_to_lru_refs(struct bch_fs *);
+
#endif /* _BCACHEFS_EC_H */
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index 98ab8496f29d..a299d9ec8ee4 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -6,6 +6,7 @@
#include "btree_iter.h"
#include "btree_update.h"
#include "btree_write_buffer.h"
+#include "ec.h"
#include "error.h"
#include "lru.h"
#include "recovery.h"
@@ -124,6 +125,8 @@ static struct bbpos lru_pos_to_bp(struct bkey_s_c lru_k)
case BCH_LRU_read:
case BCH_LRU_fragmentation:
return BBPOS(BTREE_ID_alloc, u64_to_bucket(lru_k.k->p.offset));
+ case BCH_LRU_stripes:
+ return BBPOS(BTREE_ID_stripes, POS(0, lru_k.k->p.offset));
default:
BUG();
}
@@ -151,6 +154,10 @@ static u64 bkey_lru_type_idx(struct bch_fs *c,
rcu_read_unlock();
return idx;
}
+ case BCH_LRU_stripes:
+ return k.k->type == KEY_TYPE_stripe
+ ? stripe_lru_pos(bkey_s_c_to_stripe(k).v)
+ : 0;
default:
BUG();
}
diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
index dea1d75cc9c1..8abd0aa2083a 100644
--- a/fs/bcachefs/lru.h
+++ b/fs/bcachefs/lru.h
@@ -28,9 +28,14 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l)
{
u16 lru_id = l.k->p.inode >> 48;
- if (lru_id == BCH_LRU_BUCKET_FRAGMENTATION)
+ switch (lru_id) {
+ case BCH_LRU_BUCKET_FRAGMENTATION:
return BCH_LRU_fragmentation;
- return BCH_LRU_read;
+ case BCH_LRU_STRIPE_FRAGMENTATION:
+ return BCH_LRU_stripes;
+ default:
+ return BCH_LRU_read;
+ }
}
int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, struct bkey_validate_context);
diff --git a/fs/bcachefs/lru_format.h b/fs/bcachefs/lru_format.h
index 353a352d3fb9..b7392ad8e41f 100644
--- a/fs/bcachefs/lru_format.h
+++ b/fs/bcachefs/lru_format.h
@@ -9,7 +9,8 @@ struct bch_lru {
#define BCH_LRU_TYPES() \
x(read) \
- x(fragmentation)
+ x(fragmentation) \
+ x(stripes)
enum bch_lru_type {
#define x(n) BCH_LRU_##n,
@@ -18,6 +19,7 @@ enum bch_lru_type {
};
#define BCH_LRU_BUCKET_FRAGMENTATION ((1U << 16) - 1)
+#define BCH_LRU_STRIPE_FRAGMENTATION ((1U << 16) - 2)
#define LRU_TIME_BITS 48
#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
--
2.45.2
next prev parent reply other threads:[~2025-02-13 18:46 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-13 18:45 [PATCH 00/18] last on disk format changes before freeze Kent Overstreet
2025-02-13 18:45 ` [PATCH 01/18] bcachefs: bch2_lru_change() checks for no-op Kent Overstreet
2025-02-13 18:45 ` [PATCH 02/18] bcachefs: s/BCH_LRU_FRAGMENTATION_START/BCH_LRU_BUCKET_FRAGMENTATION/ Kent Overstreet
2025-02-13 18:45 ` [PATCH 03/18] bcachefs: decouple bch2_lru_check_set() from alloc btree Kent Overstreet
2025-02-13 18:45 ` [PATCH 04/18] bcachefs: Rework bch2_check_lru_key() Kent Overstreet
2025-02-13 18:45 ` [PATCH 05/18] bcachefs: bch2_trigger_stripe_ptr() no longer uses ec_stripes_heap_lock Kent Overstreet
2025-02-13 18:45 ` [PATCH 06/18] bcachefs: Better trigger ordering Kent Overstreet
2025-02-13 18:45 ` [PATCH 07/18] bcachefs: rework bch2_trans_commit_run_triggers() Kent Overstreet
2025-02-13 18:45 ` [PATCH 08/18] bcachefs: bcachefs_metadata_version_cached_backpointers Kent Overstreet
2025-02-13 18:45 ` [PATCH 09/18] bcachefs: Invalidate cached data by backpointers Kent Overstreet
2025-02-13 18:45 ` [PATCH 10/18] bcachefs: Advance bch_alloc.oldest_gen if no stale pointers Kent Overstreet
2025-02-13 18:45 ` [PATCH 11/18] bcachefs: bcachefs_metadata_version_stripe_backpointers Kent Overstreet
2025-02-13 18:45 ` Kent Overstreet [this message]
2025-02-13 18:45 ` [PATCH 13/18] bcachefs: ec_stripe_delete() uses new stripe lru Kent Overstreet
2025-02-13 18:45 ` [PATCH 14/18] bcachefs: get_existing_stripe() " Kent Overstreet
2025-02-13 18:46 ` [PATCH 15/18] bcachefs: We no longer read stripes into memory at startup Kent Overstreet
2025-02-13 18:46 ` [PATCH 16/18] bcachefs: Kill dirent_occupied_size() Kent Overstreet
2025-02-17 1:49 ` Hongbo Li
2025-02-13 18:46 ` [PATCH 17/18] bcachefs: Split out dirent alloc and name initialization Kent Overstreet
2025-02-13 18:46 ` [PATCH 18/18] bcachefs: bcachefs_metadata_version_casefolding Kent Overstreet
2025-02-21 18:26 ` [PATCH] bcachefs: Use flexible arrays in dirent Gabriel de Perthuis
2025-02-22 14:07 ` Kent Overstreet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250213184607.18237-13-kent.overstreet@linux.dev \
--to=kent.overstreet@linux.dev \
--cc=linux-bcachefs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox