From: Kent Overstreet <kent.overstreet@linux.dev>
To: linux-bcachefs@vger.kernel.org
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Subject: [PATCH 12/18] bcachefs: bcachefs_metadata_version_stripe_lru
Date: Thu, 13 Feb 2025 13:45:57 -0500 [thread overview]
Message-ID: <20250213184607.18237-13-kent.overstreet@linux.dev> (raw)
In-Reply-To: <20250213184607.18237-1-kent.overstreet@linux.dev>
Add a persistent LRU for stripes, ordered by "number of empty blocks",
i.e. order in which we wish to reuse them.
This will replace the in-memory stripes heap, so we can kill off reading
stripes into memory at startup.
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
fs/bcachefs/alloc_background.c | 3 +-
fs/bcachefs/bcachefs_format.h | 3 +-
fs/bcachefs/ec.c | 51 ++++++++++++++++++++++++++++++++++
fs/bcachefs/ec.h | 27 ++++++++++++++++++
fs/bcachefs/lru.c | 7 +++++
fs/bcachefs/lru.h | 9 ++++--
fs/bcachefs/lru_format.h | 4 ++-
7 files changed, 99 insertions(+), 5 deletions(-)
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index c5c8497a6339..ecad4a78c3f7 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -1757,7 +1757,8 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)));
+ bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))) ?:
+ bch2_check_stripe_to_lru_refs(c);
bch2_bkey_buf_exit(&last_flushed, c);
bch_err_fn(c, ret);
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index bf3723a2bca4..b4ac311f21a1 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -688,7 +688,8 @@ struct bch_sb_field_ext {
x(autofix_errors, BCH_VERSION(1, 19)) \
x(directory_size, BCH_VERSION(1, 20)) \
x(cached_backpointers, BCH_VERSION(1, 21)) \
- x(stripe_backpointers, BCH_VERSION(1, 22))
+ x(stripe_backpointers, BCH_VERSION(1, 22)) \
+ x(stripe_lru, BCH_VERSION(1, 23))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 36590c0ce09f..1090cdb7d5cc 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -20,6 +20,7 @@
#include "io_read.h"
#include "io_write.h"
#include "keylist.h"
+#include "lru.h"
#include "recovery.h"
#include "replicas.h"
#include "super-io.h"
@@ -411,6 +412,15 @@ int bch2_trigger_stripe(struct btree_trans *trans,
(new_s->nr_blocks != old_s->nr_blocks ||
new_s->nr_redundant != old_s->nr_redundant));
+ if (flags & BTREE_TRIGGER_transactional) {
+ int ret = bch2_lru_change(trans,
+ BCH_LRU_STRIPE_FRAGMENTATION,
+ idx,
+ stripe_lru_pos(old_s),
+ stripe_lru_pos(new_s));
+ if (ret)
+ return ret;
+ }
if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
/*
@@ -1175,6 +1185,10 @@ static int ec_stripe_delete(struct btree_trans *trans, u64 idx)
return ret;
}
+/*
+ * XXX
+ * can we kill this and delete stripes from the trigger?
+ */
static void ec_stripe_delete_work(struct work_struct *work)
{
struct bch_fs *c =
@@ -2519,3 +2533,40 @@ int bch2_fs_ec_init(struct bch_fs *c)
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
BIOSET_NEED_BVECS);
}
+
+static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans,
+ struct bkey_s_c k,
+ struct bkey_buf *last_flushed)
+{
+ if (k.k->type != KEY_TYPE_stripe)
+ return 0;
+
+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+
+ u64 lru_idx = stripe_lru_pos(s.v);
+ if (lru_idx) {
+ int ret = bch2_lru_check_set(trans, BCH_LRU_STRIPE_FRAGMENTATION,
+ k.k->p.offset, lru_idx, k, last_flushed);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+int bch2_check_stripe_to_lru_refs(struct bch_fs *c)
+{
+ struct bkey_buf last_flushed;
+
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_stripes,
+ POS_MIN, BTREE_ITER_prefetch, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_check_stripe_to_lru_ref(trans, k, &last_flushed)));
+
+ bch2_bkey_buf_exit(&last_flushed, c);
+ bch_err_fn(c, ret);
+ return ret;
+}
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index 4c9511887655..cd1c837e4933 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -92,6 +92,31 @@ static inline void stripe_csum_set(struct bch_stripe *s,
memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]);
}
+#define STRIPE_LRU_POS_EMPTY 1
+
+static inline u64 stripe_lru_pos(const struct bch_stripe *s)
+{
+ if (!s)
+ return 0;
+
+ unsigned blocks_empty = 0, blocks_nonempty = 0;
+
+ for (unsigned i = 0; i < s->nr_blocks; i++) {
+ blocks_empty += !stripe_blockcount_get(s, i);
+ blocks_nonempty += !!stripe_blockcount_get(s, i);
+ }
+
+ /* Will be picked up by the stripe_delete worker */
+ if (!blocks_nonempty)
+ return STRIPE_LRU_POS_EMPTY;
+
+ if (!blocks_empty)
+ return 0;
+
+ /* invert: more blocks empty = reuse first */
+ return LRU_TIME_MAX - blocks_empty;
+}
+
static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr,
const struct bch_extent_ptr *data_ptr,
unsigned sectors)
@@ -282,4 +307,6 @@ void bch2_fs_ec_exit(struct bch_fs *);
void bch2_fs_ec_init_early(struct bch_fs *);
int bch2_fs_ec_init(struct bch_fs *);
+int bch2_check_stripe_to_lru_refs(struct bch_fs *);
+
#endif /* _BCACHEFS_EC_H */
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index 98ab8496f29d..a299d9ec8ee4 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -6,6 +6,7 @@
#include "btree_iter.h"
#include "btree_update.h"
#include "btree_write_buffer.h"
+#include "ec.h"
#include "error.h"
#include "lru.h"
#include "recovery.h"
@@ -124,6 +125,8 @@ static struct bbpos lru_pos_to_bp(struct bkey_s_c lru_k)
case BCH_LRU_read:
case BCH_LRU_fragmentation:
return BBPOS(BTREE_ID_alloc, u64_to_bucket(lru_k.k->p.offset));
+ case BCH_LRU_stripes:
+ return BBPOS(BTREE_ID_stripes, POS(0, lru_k.k->p.offset));
default:
BUG();
}
@@ -151,6 +154,10 @@ static u64 bkey_lru_type_idx(struct bch_fs *c,
rcu_read_unlock();
return idx;
}
+ case BCH_LRU_stripes:
+ return k.k->type == KEY_TYPE_stripe
+ ? stripe_lru_pos(bkey_s_c_to_stripe(k).v)
+ : 0;
default:
BUG();
}
diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
index dea1d75cc9c1..8abd0aa2083a 100644
--- a/fs/bcachefs/lru.h
+++ b/fs/bcachefs/lru.h
@@ -28,9 +28,14 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l)
{
u16 lru_id = l.k->p.inode >> 48;
- if (lru_id == BCH_LRU_BUCKET_FRAGMENTATION)
+ switch (lru_id) {
+ case BCH_LRU_BUCKET_FRAGMENTATION:
return BCH_LRU_fragmentation;
- return BCH_LRU_read;
+ case BCH_LRU_STRIPE_FRAGMENTATION:
+ return BCH_LRU_stripes;
+ default:
+ return BCH_LRU_read;
+ }
}
int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, struct bkey_validate_context);
diff --git a/fs/bcachefs/lru_format.h b/fs/bcachefs/lru_format.h
index 353a352d3fb9..b7392ad8e41f 100644
--- a/fs/bcachefs/lru_format.h
+++ b/fs/bcachefs/lru_format.h
@@ -9,7 +9,8 @@ struct bch_lru {
#define BCH_LRU_TYPES() \
x(read) \
- x(fragmentation)
+ x(fragmentation) \
+ x(stripes)
enum bch_lru_type {
#define x(n) BCH_LRU_##n,
@@ -18,6 +19,7 @@ enum bch_lru_type {
};
#define BCH_LRU_BUCKET_FRAGMENTATION ((1U << 16) - 1)
+#define BCH_LRU_STRIPE_FRAGMENTATION ((1U << 16) - 2)
#define LRU_TIME_BITS 48
#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
--
2.45.2
next prev parent reply other threads:[~2025-02-13 18:46 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-13 18:45 [PATCH 00/18] last on disk format changes before freeze Kent Overstreet
2025-02-13 18:45 ` [PATCH 01/18] bcachefs: bch2_lru_change() checks for no-op Kent Overstreet
2025-02-13 18:45 ` [PATCH 02/18] bcachefs: s/BCH_LRU_FRAGMENTATION_START/BCH_LRU_BUCKET_FRAGMENTATION/ Kent Overstreet
2025-02-13 18:45 ` [PATCH 03/18] bcachefs: decouple bch2_lru_check_set() from alloc btree Kent Overstreet
2025-02-13 18:45 ` [PATCH 04/18] bcachefs: Rework bch2_check_lru_key() Kent Overstreet
2025-02-13 18:45 ` [PATCH 05/18] bcachefs: bch2_trigger_stripe_ptr() no longer uses ec_stripes_heap_lock Kent Overstreet
2025-02-13 18:45 ` [PATCH 06/18] bcachefs: Better trigger ordering Kent Overstreet
2025-02-13 18:45 ` [PATCH 07/18] bcachefs: rework bch2_trans_commit_run_triggers() Kent Overstreet
2025-02-13 18:45 ` [PATCH 08/18] bcachefs: bcachefs_metadata_version_cached_backpointers Kent Overstreet
2025-02-13 18:45 ` [PATCH 09/18] bcachefs: Invalidate cached data by backpointers Kent Overstreet
2025-02-13 18:45 ` [PATCH 10/18] bcachefs: Advance bch_alloc.oldest_gen if no stale pointers Kent Overstreet
2025-02-13 18:45 ` [PATCH 11/18] bcachefs: bcachefs_metadata_version_stripe_backpointers Kent Overstreet
2025-02-13 18:45 ` Kent Overstreet [this message]
2025-02-13 18:45 ` [PATCH 13/18] bcachefs: ec_stripe_delete() uses new stripe lru Kent Overstreet
2025-02-13 18:45 ` [PATCH 14/18] bcachefs: get_existing_stripe() " Kent Overstreet
2025-02-13 18:46 ` [PATCH 15/18] bcachefs: We no longer read stripes into memory at startup Kent Overstreet
2025-02-13 18:46 ` [PATCH 16/18] bcachefs: Kill dirent_occupied_size() Kent Overstreet
2025-02-17 1:49 ` Hongbo Li
2025-02-13 18:46 ` [PATCH 17/18] bcachefs: Split out dirent alloc and name initialization Kent Overstreet
2025-02-13 18:46 ` [PATCH 18/18] bcachefs: bcachefs_metadata_version_casefolding Kent Overstreet
2025-02-21 18:26 ` [PATCH] bcachefs: Use flexible arrays in dirent Gabriel de Perthuis
2025-02-22 14:07 ` Kent Overstreet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250213184607.18237-13-kent.overstreet@linux.dev \
--to=kent.overstreet@linux.dev \
--cc=linux-bcachefs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.