public inbox for linux-bcachefs@vger.kernel.org
 help / color / mirror / Atom feed
From: Kent Overstreet <kent.overstreet@linux.dev>
To: linux-bcachefs@vger.kernel.org
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Subject: [PATCH 12/18] bcachefs: bcachefs_metadata_version_stripe_lru
Date: Thu, 13 Feb 2025 13:45:57 -0500	[thread overview]
Message-ID: <20250213184607.18237-13-kent.overstreet@linux.dev> (raw)
In-Reply-To: <20250213184607.18237-1-kent.overstreet@linux.dev>

Add a persistent LRU for stripes, ordered by "number of empty blocks",
i.e. order in which we wish to reuse them.

This will replace the in-memory stripes heap, so we can kill off reading
stripes into memory at startup.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/alloc_background.c |  3 +-
 fs/bcachefs/bcachefs_format.h  |  3 +-
 fs/bcachefs/ec.c               | 51 ++++++++++++++++++++++++++++++++++
 fs/bcachefs/ec.h               | 27 ++++++++++++++++++
 fs/bcachefs/lru.c              |  7 +++++
 fs/bcachefs/lru.h              |  9 ++++--
 fs/bcachefs/lru_format.h       |  4 ++-
 7 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index c5c8497a6339..ecad4a78c3f7 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -1757,7 +1757,8 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
 		for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
 				POS_MIN, BTREE_ITER_prefetch, k,
 				NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
-			bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)));
+			bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))) ?:
+		bch2_check_stripe_to_lru_refs(c);
 
 	bch2_bkey_buf_exit(&last_flushed, c);
 	bch_err_fn(c, ret);
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index bf3723a2bca4..b4ac311f21a1 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -688,7 +688,8 @@ struct bch_sb_field_ext {
 	x(autofix_errors,		BCH_VERSION(1, 19))		\
 	x(directory_size,		BCH_VERSION(1, 20))		\
 	x(cached_backpointers,		BCH_VERSION(1, 21))		\
-	x(stripe_backpointers,		BCH_VERSION(1, 22))
+	x(stripe_backpointers,		BCH_VERSION(1, 22))		\
+	x(stripe_lru,			BCH_VERSION(1, 23))
 
 enum bcachefs_metadata_version {
 	bcachefs_metadata_version_min = 9,
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 36590c0ce09f..1090cdb7d5cc 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -20,6 +20,7 @@
 #include "io_read.h"
 #include "io_write.h"
 #include "keylist.h"
+#include "lru.h"
 #include "recovery.h"
 #include "replicas.h"
 #include "super-io.h"
@@ -411,6 +412,15 @@ int bch2_trigger_stripe(struct btree_trans *trans,
 	       (new_s->nr_blocks	!= old_s->nr_blocks ||
 		new_s->nr_redundant	!= old_s->nr_redundant));
 
+	if (flags & BTREE_TRIGGER_transactional) {
+		int ret = bch2_lru_change(trans,
+					  BCH_LRU_STRIPE_FRAGMENTATION,
+					  idx,
+					  stripe_lru_pos(old_s),
+					  stripe_lru_pos(new_s));
+		if (ret)
+			return ret;
+	}
 
 	if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
 		/*
@@ -1175,6 +1185,10 @@ static int ec_stripe_delete(struct btree_trans *trans, u64 idx)
 	return ret;
 }
 
+/*
+ * XXX
+ * can we kill this and delete stripes from the trigger?
+ */
 static void ec_stripe_delete_work(struct work_struct *work)
 {
 	struct bch_fs *c =
@@ -2519,3 +2533,40 @@ int bch2_fs_ec_init(struct bch_fs *c)
 	return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
 			   BIOSET_NEED_BVECS);
 }
+
+static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans,
+					struct bkey_s_c k,
+					struct bkey_buf *last_flushed)
+{
+	if (k.k->type != KEY_TYPE_stripe)
+		return 0;
+
+	struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+
+	u64 lru_idx = stripe_lru_pos(s.v);
+	if (lru_idx) {
+		int ret = bch2_lru_check_set(trans, BCH_LRU_STRIPE_FRAGMENTATION,
+					     k.k->p.offset, lru_idx, k, last_flushed);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+int bch2_check_stripe_to_lru_refs(struct bch_fs *c)
+{
+	struct bkey_buf last_flushed;
+
+	bch2_bkey_buf_init(&last_flushed);
+	bkey_init(&last_flushed.k->k);
+
+	int ret = bch2_trans_run(c,
+		for_each_btree_key_commit(trans, iter, BTREE_ID_stripes,
+				POS_MIN, BTREE_ITER_prefetch, k,
+				NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+			bch2_check_stripe_to_lru_ref(trans, k, &last_flushed)));
+
+	bch2_bkey_buf_exit(&last_flushed, c);
+	bch_err_fn(c, ret);
+	return ret;
+}
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index 4c9511887655..cd1c837e4933 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -92,6 +92,31 @@ static inline void stripe_csum_set(struct bch_stripe *s,
 	memcpy(stripe_csum(s, block, csum_idx), &csum, bch_crc_bytes[s->csum_type]);
 }
 
+#define STRIPE_LRU_POS_EMPTY	1
+
+static inline u64 stripe_lru_pos(const struct bch_stripe *s)
+{
+	if (!s)
+		return 0;
+
+	unsigned blocks_empty = 0, blocks_nonempty = 0;
+
+	for (unsigned i = 0; i < s->nr_blocks; i++) {
+		blocks_empty	+=  !stripe_blockcount_get(s, i);
+		blocks_nonempty	+= !!stripe_blockcount_get(s, i);
+	}
+
+	/* Will be picked up by the stripe_delete worker */
+	if (!blocks_nonempty)
+		return STRIPE_LRU_POS_EMPTY;
+
+	if (!blocks_empty)
+		return 0;
+
+	/* invert: more blocks empty = reuse first */
+	return LRU_TIME_MAX - blocks_empty;
+}
+
 static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe_ptr,
 					     const struct bch_extent_ptr *data_ptr,
 					     unsigned sectors)
@@ -282,4 +307,6 @@ void bch2_fs_ec_exit(struct bch_fs *);
 void bch2_fs_ec_init_early(struct bch_fs *);
 int bch2_fs_ec_init(struct bch_fs *);
 
+int bch2_check_stripe_to_lru_refs(struct bch_fs *);
+
 #endif /* _BCACHEFS_EC_H */
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index 98ab8496f29d..a299d9ec8ee4 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -6,6 +6,7 @@
 #include "btree_iter.h"
 #include "btree_update.h"
 #include "btree_write_buffer.h"
+#include "ec.h"
 #include "error.h"
 #include "lru.h"
 #include "recovery.h"
@@ -124,6 +125,8 @@ static struct bbpos lru_pos_to_bp(struct bkey_s_c lru_k)
 	case BCH_LRU_read:
 	case BCH_LRU_fragmentation:
 		return BBPOS(BTREE_ID_alloc, u64_to_bucket(lru_k.k->p.offset));
+	case BCH_LRU_stripes:
+		return BBPOS(BTREE_ID_stripes, POS(0, lru_k.k->p.offset));
 	default:
 		BUG();
 	}
@@ -151,6 +154,10 @@ static u64 bkey_lru_type_idx(struct bch_fs *c,
 		rcu_read_unlock();
 		return idx;
 	}
+	case BCH_LRU_stripes:
+		return k.k->type == KEY_TYPE_stripe
+			? stripe_lru_pos(bkey_s_c_to_stripe(k).v)
+			: 0;
 	default:
 		BUG();
 	}
diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h
index dea1d75cc9c1..8abd0aa2083a 100644
--- a/fs/bcachefs/lru.h
+++ b/fs/bcachefs/lru.h
@@ -28,9 +28,14 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l)
 {
 	u16 lru_id = l.k->p.inode >> 48;
 
-	if (lru_id == BCH_LRU_BUCKET_FRAGMENTATION)
+	switch (lru_id) {
+	case BCH_LRU_BUCKET_FRAGMENTATION:
 		return BCH_LRU_fragmentation;
-	return BCH_LRU_read;
+	case BCH_LRU_STRIPE_FRAGMENTATION:
+		return BCH_LRU_stripes;
+	default:
+		return BCH_LRU_read;
+	}
 }
 
 int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, struct bkey_validate_context);
diff --git a/fs/bcachefs/lru_format.h b/fs/bcachefs/lru_format.h
index 353a352d3fb9..b7392ad8e41f 100644
--- a/fs/bcachefs/lru_format.h
+++ b/fs/bcachefs/lru_format.h
@@ -9,7 +9,8 @@ struct bch_lru {
 
 #define BCH_LRU_TYPES()		\
 	x(read)			\
-	x(fragmentation)
+	x(fragmentation)	\
+	x(stripes)
 
 enum bch_lru_type {
 #define x(n) BCH_LRU_##n,
@@ -18,6 +19,7 @@ enum bch_lru_type {
 };
 
 #define BCH_LRU_BUCKET_FRAGMENTATION	((1U << 16) - 1)
+#define BCH_LRU_STRIPE_FRAGMENTATION	((1U << 16) - 2)
 
 #define LRU_TIME_BITS			48
 #define LRU_TIME_MAX			((1ULL << LRU_TIME_BITS) - 1)
-- 
2.45.2


  parent reply	other threads:[~2025-02-13 18:46 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-13 18:45 [PATCH 00/18] last on disk format changes before freeze Kent Overstreet
2025-02-13 18:45 ` [PATCH 01/18] bcachefs: bch2_lru_change() checks for no-op Kent Overstreet
2025-02-13 18:45 ` [PATCH 02/18] bcachefs: s/BCH_LRU_FRAGMENTATION_START/BCH_LRU_BUCKET_FRAGMENTATION/ Kent Overstreet
2025-02-13 18:45 ` [PATCH 03/18] bcachefs: decouple bch2_lru_check_set() from alloc btree Kent Overstreet
2025-02-13 18:45 ` [PATCH 04/18] bcachefs: Rework bch2_check_lru_key() Kent Overstreet
2025-02-13 18:45 ` [PATCH 05/18] bcachefs: bch2_trigger_stripe_ptr() no longer uses ec_stripes_heap_lock Kent Overstreet
2025-02-13 18:45 ` [PATCH 06/18] bcachefs: Better trigger ordering Kent Overstreet
2025-02-13 18:45 ` [PATCH 07/18] bcachefs: rework bch2_trans_commit_run_triggers() Kent Overstreet
2025-02-13 18:45 ` [PATCH 08/18] bcachefs: bcachefs_metadata_version_cached_backpointers Kent Overstreet
2025-02-13 18:45 ` [PATCH 09/18] bcachefs: Invalidate cached data by backpointers Kent Overstreet
2025-02-13 18:45 ` [PATCH 10/18] bcachefs: Advance bch_alloc.oldest_gen if no stale pointers Kent Overstreet
2025-02-13 18:45 ` [PATCH 11/18] bcachefs: bcachefs_metadata_version_stripe_backpointers Kent Overstreet
2025-02-13 18:45 ` Kent Overstreet [this message]
2025-02-13 18:45 ` [PATCH 13/18] bcachefs: ec_stripe_delete() uses new stripe lru Kent Overstreet
2025-02-13 18:45 ` [PATCH 14/18] bcachefs: get_existing_stripe() " Kent Overstreet
2025-02-13 18:46 ` [PATCH 15/18] bcachefs: We no longer read stripes into memory at startup Kent Overstreet
2025-02-13 18:46 ` [PATCH 16/18] bcachefs: Kill dirent_occupied_size() Kent Overstreet
2025-02-17  1:49   ` Hongbo Li
2025-02-13 18:46 ` [PATCH 17/18] bcachefs: Split out dirent alloc and name initialization Kent Overstreet
2025-02-13 18:46 ` [PATCH 18/18] bcachefs: bcachefs_metadata_version_casefolding Kent Overstreet
2025-02-21 18:26   ` [PATCH] bcachefs: Use flexible arrays in dirent Gabriel de Perthuis
2025-02-22 14:07     ` Kent Overstreet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250213184607.18237-13-kent.overstreet@linux.dev \
    --to=kent.overstreet@linux.dev \
    --cc=linux-bcachefs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox