From: Mingzhe Zou <mingzhe.zou@easystack.cn>
To: colyli@suse.de, linux-bcache@vger.kernel.org
Cc: bcache@lists.ewheeler.net, zoumingzhe@qq.com
Subject: [PATCH v3 1/3] bcache: the gc_sectors_used size matches the bucket size
Date: Wed, 19 Jul 2023 15:27:51 +0800 [thread overview]
Message-ID: <20230719072753.366-1-mingzhe.zou@easystack.cn> (raw)
From: Mingzhe Zou <zoumingzhe@qq.com>
The bucket size in the superblock is defined as uint16_t.
But, GC_SECTORS_USED is only 13 bits. If the bucket size
is 4M (8192 sectors), GC_SECTORS_USED will be truncated
to MAX_GC_SECTORS_USED.
GC_SECTORS_USED is the moving gc sorting condition, we
should try our best to ensure it is correct.
Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn>
---
drivers/md/bcache/alloc.c | 12 ++++++++----
drivers/md/bcache/bcache.h | 12 ++++++------
drivers/md/bcache/btree.c | 18 ++++++++++++------
drivers/md/bcache/movinggc.c | 14 +++++++-------
drivers/md/bcache/sysfs.c | 2 +-
include/trace/events/bcache.h | 2 +-
6 files changed, 35 insertions(+), 25 deletions(-)
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index ce13c272c387..4ae1018bf029 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -142,7 +142,7 @@ void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
lockdep_assert_held(&ca->set->bucket_lock);
BUG_ON(GC_MARK(b) && GC_MARK(b) != GC_MARK_RECLAIMABLE);
- if (GC_SECTORS_USED(b))
+ if (b->gc_sectors_used)
trace_bcache_invalidate(ca, b - ca->buckets);
bch_inc_gen(ca, b);
@@ -170,7 +170,7 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
({ \
unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
\
- (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \
+ (b->prio - ca->set->min_prio + min_prio) * b->gc_sectors_used; \
})
#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r))
@@ -446,7 +446,11 @@ long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait)
BUG_ON(atomic_read(&b->pin) != 1);
- SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
+ /*
+ * If gc_sectors_used is 0, moving gc is preferred. But the new bucket
+ * should not do moving gc. So we set gc_sectors_used to the maximum.
+ */
+ b->gc_sectors_used = ca->sb.bucket_size;
if (reserve <= RESERVE_PRIO) {
SET_GC_MARK(b, GC_MARK_METADATA);
@@ -469,7 +473,7 @@ long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait)
void __bch_bucket_free(struct cache *ca, struct bucket *b)
{
SET_GC_MARK(b, 0);
- SET_GC_SECTORS_USED(b, 0);
+ b->gc_sectors_used = 0;
if (ca->set->avail_nbuckets < ca->set->nbuckets) {
ca->set->avail_nbuckets++;
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 5a79bb3c272f..582df3c9dc1b 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -197,9 +197,10 @@
struct bucket {
atomic_t pin;
uint16_t prio;
+ uint16_t flag;
uint8_t gen;
uint8_t last_gc; /* Most out of date gen in the btree */
- uint16_t gc_mark; /* Bitfield used by GC. See below for field */
+ uint16_t gc_sectors_used;
};
/*
@@ -207,14 +208,11 @@ struct bucket {
* as multiple threads touch struct bucket without locking
*/
-BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
#define GC_MARK_RECLAIMABLE 1
#define GC_MARK_DIRTY 2
#define GC_MARK_METADATA 3
-#define GC_SECTORS_USED_SIZE 13
-#define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE))
-BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
-BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
+BITMASK(GC_MARK, struct bucket, flag, 0, 2);
+BITMASK(GC_MOVE, struct bucket, flag, 2, 1);
#include "journal.h"
#include "stats.h"
@@ -764,6 +762,8 @@ struct bbio {
#define bucket_bytes(ca) ((ca)->sb.bucket_size << 9)
#define block_bytes(ca) ((ca)->sb.block_size << 9)
+#define MAX_BUCKET_SIZE (~(~0ULL << 16)) /* sectors */
+
static inline unsigned int meta_bucket_pages(struct cache_sb *sb)
{
unsigned int n, max_pages;
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index fd121a61f17c..ca962f329977 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1255,11 +1255,11 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
SET_GC_MARK(g, GC_MARK_RECLAIMABLE);
/* guard against overflow */
- SET_GC_SECTORS_USED(g, min_t(unsigned int,
- GC_SECTORS_USED(g) + KEY_SIZE(k),
- MAX_GC_SECTORS_USED));
+ g->gc_sectors_used = min_t(uint16_t, c->cache->sb.bucket_size,
+ g->gc_sectors_used + KEY_SIZE(k));
- BUG_ON(!GC_SECTORS_USED(g));
+ BUG_ON(g->gc_sectors_used < KEY_SIZE(k) ||
+ g->gc_sectors_used > c->cache->sb.bucket_size);
}
return stale;
@@ -1732,9 +1732,15 @@ static void btree_gc_start(struct cache_set *c)
ca = c->cache;
for_each_bucket(b, ca) {
b->last_gc = b->gen;
+
+ /*
+ * If the bucket is still in use, mark is not necessary.
+ * In bch_bucket_alloc(), we set the gc_sectors_used to
+ * cache bucket size, just keep the maximum.
+ */
if (!atomic_read(&b->pin)) {
SET_GC_MARK(b, 0);
- SET_GC_SECTORS_USED(b, 0);
+ b->gc_sectors_used = 0;
}
}
@@ -1797,7 +1803,7 @@ static void bch_btree_gc_finish(struct cache_set *c)
if (atomic_read(&b->pin))
continue;
- BUG_ON(!GC_MARK(b) && GC_SECTORS_USED(b));
+ BUG_ON(!GC_MARK(b) && b->gc_sectors_used);
if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE)
c->avail_nbuckets++;
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 9f32901fdad1..e4182c3ba9f8 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -184,14 +184,14 @@ err: if (!IS_ERR_OR_NULL(w->private))
static bool bucket_cmp(struct bucket *l, struct bucket *r)
{
- return GC_SECTORS_USED(l) < GC_SECTORS_USED(r);
+ return l->gc_sectors_used < r->gc_sectors_used;
}
static unsigned int bucket_heap_top(struct cache *ca)
{
struct bucket *b;
- return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
+ return (b = heap_peek(&ca->heap)) ? b->gc_sectors_used : 0;
}
void bch_moving_gc(struct cache_set *c)
@@ -213,17 +213,17 @@ void bch_moving_gc(struct cache_set *c)
for_each_bucket(b, ca) {
if (GC_MARK(b) == GC_MARK_METADATA ||
- !GC_SECTORS_USED(b) ||
- GC_SECTORS_USED(b) == ca->sb.bucket_size ||
+ !b->gc_sectors_used ||
+ b->gc_sectors_used == ca->sb.bucket_size ||
atomic_read(&b->pin))
continue;
if (!heap_full(&ca->heap)) {
- sectors_to_move += GC_SECTORS_USED(b);
+ sectors_to_move += b->gc_sectors_used;
heap_add(&ca->heap, b, bucket_cmp);
} else if (bucket_cmp(b, heap_peek(&ca->heap))) {
sectors_to_move -= bucket_heap_top(ca);
- sectors_to_move += GC_SECTORS_USED(b);
+ sectors_to_move += b->gc_sectors_used;
ca->heap.data[0] = b;
heap_sift(&ca->heap, 0, bucket_cmp);
@@ -232,7 +232,7 @@ void bch_moving_gc(struct cache_set *c)
while (sectors_to_move > reserve_sectors) {
heap_pop(&ca->heap, b, bucket_cmp);
- sectors_to_move -= GC_SECTORS_USED(b);
+ sectors_to_move -= b->gc_sectors_used;
}
while (heap_pop(&ca->heap, b, bucket_cmp))
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 0e2c1880f60b..3b859954b8c5 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -1073,7 +1073,7 @@ SHOW(__bch_cache)
mutex_lock(&ca->set->bucket_lock);
for_each_bucket(b, ca) {
- if (!GC_SECTORS_USED(b))
+ if (!b->gc_sectors_used)
unused++;
if (GC_MARK(b) == GC_MARK_RECLAIMABLE)
available++;
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index 899fdacf57b9..b9d63e18c453 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -438,7 +438,7 @@ TRACE_EVENT(bcache_invalidate,
TP_fast_assign(
__entry->dev = ca->bdev->bd_dev;
__entry->offset = bucket << ca->set->bucket_bits;
- __entry->sectors = GC_SECTORS_USED(&ca->buckets[bucket]);
+ __entry->sectors = ca->buckets[bucket].gc_sectors_used;
),
TP_printk("invalidated %u sectors at %d,%d sector=%llu",
--
2.17.1.windows.2
next reply other threads:[~2023-07-19 7:28 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-19 7:27 Mingzhe Zou [this message]
2023-07-19 7:27 ` [PATCH v3 2/3] bcache: Separate bch_moving_gc() from bch_btree_gc() Mingzhe Zou
2023-07-19 7:27 ` [PATCH v3 3/3] bcache: only copy dirty data during moving gc Mingzhe Zou
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230719072753.366-1-mingzhe.zou@easystack.cn \
--to=mingzhe.zou@easystack.cn \
--cc=bcache@lists.ewheeler.net \
--cc=colyli@suse.de \
--cc=linux-bcache@vger.kernel.org \
--cc=zoumingzhe@qq.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).