* [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap
@ 2025-06-06 7:19 Robert Pang
2025-06-06 7:19 ` [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used Robert Pang
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Robert Pang @ 2025-06-06 7:19 UTC (permalink / raw)
To: Coly Li, Kent Overstreet, linux-bcache; +Cc: Robert Pang, Kuan-Wei Chiu
This patch series reverts bcache to its original top-down heap sifting strategy
for LRG cache replacement, which fixes a tail I/O latency regression.
Discussion: https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#me50a9ddd0386ce602b2f17415e02d33b8e29f533
Robert Pang (3):
lib min_heap: refactor min_heap to allow the alternative sift-down
function to be used
lib min_heap: add alternative APIs that use the conventional top-down
strategy to sift down elements
bcache: Fix the tail IO latency regression due to the use of lib
min_heap
drivers/md/bcache/alloc.c | 14 ++--
include/linux/min_heap.h | 135 ++++++++++++++++++++++++++++++++------
lib/min_heap.c | 31 ++++++---
3 files changed, 145 insertions(+), 35 deletions(-)
--
2.50.0.rc1.591.g9c95f17f64-goog
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used
2025-06-06 7:19 [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
@ 2025-06-06 7:19 ` Robert Pang
2025-06-06 7:19 ` [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements Robert Pang
2025-06-06 7:19 ` [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
2 siblings, 0 replies; 7+ messages in thread
From: Robert Pang @ 2025-06-06 7:19 UTC (permalink / raw)
To: Coly Li, Kent Overstreet, linux-bcache; +Cc: Robert Pang, Kuan-Wei Chiu
Refactor these min_heap's internal functions that require sifting down elements
to take the sift-down function to use. This change will allow for the use of
alternative sift-down strategies, potentially offering significant performance
improvements for certain data distributions compared to the current bottom-up
approach.
- heapify_all
- heap_pop
- heap_pop_push
- heap_del
Signed-off-by: Robert Pang <robertpang@google.com>
---
include/linux/min_heap.h | 60 ++++++++++++++++++++++++++--------------
lib/min_heap.c | 24 ++++++++++------
2 files changed, 56 insertions(+), 28 deletions(-)
diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 1160bed6579e..1fe6772170e7 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -322,22 +322,27 @@ void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx
/* Floyd's approach to heapification that is O(nr). */
static __always_inline
void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size,
- const struct min_heap_callbacks *func, void *args)
+ const struct min_heap_callbacks *func, void *args,
+ void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args))
{
int i;
for (i = heap->nr / 2 - 1; i >= 0; i--)
- __min_heap_sift_down_inline(heap, i, elem_size, func, args);
+ sift_down(heap, i, elem_size, func, args);
}
#define min_heapify_all_inline(_heap, _func, _args) \
__min_heapify_all_inline(container_of(&(_heap)->nr, min_heap_char, nr), \
- __minheap_obj_size(_heap), _func, _args)
+ __minheap_obj_size(_heap), _func, _args, \
+ __min_heap_sift_down_inline)
/* Remove minimum element from the heap, O(log2(nr)). */
static __always_inline
bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size,
- const struct min_heap_callbacks *func, void *args)
+ const struct min_heap_callbacks *func, void *args,
+ void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args))
{
void *data = heap->data;
@@ -347,14 +352,15 @@ bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size,
/* Place last element at the root (position 0) and then sift down. */
heap->nr--;
memcpy(data, data + (heap->nr * elem_size), elem_size);
- __min_heap_sift_down_inline(heap, 0, elem_size, func, args);
+ sift_down(heap, 0, elem_size, func, args);
return true;
}
#define min_heap_pop_inline(_heap, _func, _args) \
__min_heap_pop_inline(container_of(&(_heap)->nr, min_heap_char, nr), \
- __minheap_obj_size(_heap), _func, _args)
+ __minheap_obj_size(_heap), _func, _args, \
+ __min_heap_sift_down_inline)
/*
* Remove the minimum element and then push the given element. The
@@ -363,15 +369,18 @@ bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size,
*/
static __always_inline
void __min_heap_pop_push_inline(min_heap_char *heap, const void *element, size_t elem_size,
- const struct min_heap_callbacks *func, void *args)
+ const struct min_heap_callbacks *func, void *args,
+ void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args))
{
memcpy(heap->data, element, elem_size);
- __min_heap_sift_down_inline(heap, 0, elem_size, func, args);
+ sift_down(heap, 0, elem_size, func, args);
}
#define min_heap_pop_push_inline(_heap, _element, _func, _args) \
__min_heap_pop_push_inline(container_of(&(_heap)->nr, min_heap_char, nr), _element, \
- __minheap_obj_size(_heap), _func, _args)
+ __minheap_obj_size(_heap), _func, _args, \
+ __min_heap_sift_down_inline)
/* Push an element on to the heap, O(log2(nr)). */
static __always_inline
@@ -402,7 +411,9 @@ bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t ele
/* Remove ith element from the heap, O(log2(nr)). */
static __always_inline
bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx,
- const struct min_heap_callbacks *func, void *args)
+ const struct min_heap_callbacks *func, void *args,
+ void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args))
{
void *data = heap->data;
void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
@@ -419,14 +430,15 @@ bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx,
return true;
do_swap(data + (idx * elem_size), data + (heap->nr * elem_size), elem_size, swp, args);
__min_heap_sift_up_inline(heap, elem_size, idx, func, args);
- __min_heap_sift_down_inline(heap, idx, elem_size, func, args);
+ sift_down(heap, idx, elem_size, func, args);
return true;
}
#define min_heap_del_inline(_heap, _idx, _func, _args) \
__min_heap_del_inline(container_of(&(_heap)->nr, min_heap_char, nr), \
- __minheap_obj_size(_heap), _idx, _func, _args)
+ __minheap_obj_size(_heap), _idx, _func, _args, \
+ __min_heap_sift_down_inline))
void __min_heap_init(min_heap_char *heap, void *data, int size);
void *__min_heap_peek(struct min_heap_char *heap);
@@ -436,15 +448,23 @@ void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
const struct min_heap_callbacks *func, void *args);
void __min_heapify_all(min_heap_char *heap, size_t elem_size,
- const struct min_heap_callbacks *func, void *args);
+ const struct min_heap_callbacks *func, void *args,
+ void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args));
bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
- const struct min_heap_callbacks *func, void *args);
+ const struct min_heap_callbacks *func, void *args,
+ void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args));
void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size,
- const struct min_heap_callbacks *func, void *args);
+ const struct min_heap_callbacks *func, void *args,
+ void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args));
bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
const struct min_heap_callbacks *func, void *args);
bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
- const struct min_heap_callbacks *func, void *args);
+ const struct min_heap_callbacks *func, void *args,
+ void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args));
#define min_heap_init(_heap, _data, _size) \
__min_heap_init(container_of(&(_heap)->nr, min_heap_char, nr), _data, _size)
@@ -460,18 +480,18 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
__minheap_obj_size(_heap), _idx, _func, _args)
#define min_heapify_all(_heap, _func, _args) \
__min_heapify_all(container_of(&(_heap)->nr, min_heap_char, nr), \
- __minheap_obj_size(_heap), _func, _args)
+ __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down)
#define min_heap_pop(_heap, _func, _args) \
__min_heap_pop(container_of(&(_heap)->nr, min_heap_char, nr), \
- __minheap_obj_size(_heap), _func, _args)
+ __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down)
#define min_heap_pop_push(_heap, _element, _func, _args) \
__min_heap_pop_push(container_of(&(_heap)->nr, min_heap_char, nr), _element, \
- __minheap_obj_size(_heap), _func, _args)
+ __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down)
#define min_heap_push(_heap, _element, _func, _args) \
__min_heap_push(container_of(&(_heap)->nr, min_heap_char, nr), _element, \
__minheap_obj_size(_heap), _func, _args)
#define min_heap_del(_heap, _idx, _func, _args) \
__min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr), \
- __minheap_obj_size(_heap), _idx, _func, _args)
+ __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down)
#endif /* _LINUX_MIN_HEAP_H */
diff --git a/lib/min_heap.c b/lib/min_heap.c
index 4485372ff3b1..4ec425788783 100644
--- a/lib/min_heap.c
+++ b/lib/min_heap.c
@@ -35,23 +35,29 @@ void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
EXPORT_SYMBOL(__min_heap_sift_up);
void __min_heapify_all(min_heap_char *heap, size_t elem_size,
- const struct min_heap_callbacks *func, void *args)
+ const struct min_heap_callbacks *func, void *args,
+ void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args))
{
- __min_heapify_all_inline(heap, elem_size, func, args);
+ __min_heapify_all_inline(heap, elem_size, func, args, sift_down);
}
EXPORT_SYMBOL(__min_heapify_all);
bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
- const struct min_heap_callbacks *func, void *args)
+ const struct min_heap_callbacks *func, void *args,
+ void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args))
{
- return __min_heap_pop_inline(heap, elem_size, func, args);
+ return __min_heap_pop_inline(heap, elem_size, func, args, sift_down);
}
EXPORT_SYMBOL(__min_heap_pop);
void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size,
- const struct min_heap_callbacks *func, void *args)
+ const struct min_heap_callbacks *func, void *args,
+ void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args))
{
- __min_heap_pop_push_inline(heap, element, elem_size, func, args);
+ __min_heap_pop_push_inline(heap, element, elem_size, func, args, sift_down);
}
EXPORT_SYMBOL(__min_heap_pop_push);
@@ -63,8 +69,10 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
EXPORT_SYMBOL(__min_heap_push);
bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
- const struct min_heap_callbacks *func, void *args)
+ const struct min_heap_callbacks *func, void *args,
+ void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args))
{
- return __min_heap_del_inline(heap, elem_size, idx, func, args);
+ return __min_heap_del_inline(heap, elem_size, idx, func, args, sift_down);
}
EXPORT_SYMBOL(__min_heap_del);
--
2.50.0.rc1.591.g9c95f17f64-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements
2025-06-06 7:19 [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
2025-06-06 7:19 ` [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used Robert Pang
@ 2025-06-06 7:19 ` Robert Pang
2025-06-06 12:52 ` Kuan-Wei Chiu
2025-06-06 7:19 ` [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
2 siblings, 1 reply; 7+ messages in thread
From: Robert Pang @ 2025-06-06 7:19 UTC (permalink / raw)
To: Coly Li, Kent Overstreet, linux-bcache; +Cc: Robert Pang, Kuan-Wei Chiu
Add these min_heap functions that re-introduce the conventional top-down
strategy to sift down elements. This strategy offers significant performance
improvements for data that are mostly identical. [1]
- heapify_all_top_down
- heap_pop_top_down
- heap_pop_push_top_down
- heap_del_top_down
[1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#m155a21be72ff0cc57d825affbcafc77ac5c2dd0d
Signed-off-by: Robert Pang <robertpang@google.com>
---
include/linux/min_heap.h | 75 ++++++++++++++++++++++++++++++++++++++++
lib/min_heap.c | 7 ++++
2 files changed, 82 insertions(+)
diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 1fe6772170e7..149069317bb3 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -494,4 +494,79 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
__min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr), \
__minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down)
+static __always_inline
+void __min_heap_sift_down_top_down_inline(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args)
+{
+ void *data = heap->data;
+ void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
+ /* pre-scale counters for performance */
+ size_t a = pos * elem_size;
+ size_t b, c, d, smallest;
+ size_t n = heap->nr * elem_size;
+
+ if (!swp)
+ swp = select_swap_func(data, elem_size);
+
+ for (;;) {
+ if (2 * a + elem_size >= n)
+ break;
+
+ c = 2 * a + elem_size;
+ b = a;
+ smallest = b;
+ if (func->less(data + c, data + smallest, args))
+ smallest = c;
+
+ if (c + elem_size < n) {
+ d = c + elem_size;
+ if (func->less(data + d, data + smallest, args))
+ smallest = d;
+ }
+ if (smallest == b)
+ break;
+ do_swap(data + smallest, data + b, elem_size, swp, args);
+ a = (smallest == c) ? c : d;
+ }
+}
+
+#define min_heap_sift_down_top_down_inline(_heap, _pos, _func, _args) \
+ __min_heap_sift_down_top_down_inline(container_of(&(_heap)->nr, min_heap_char, nr), \
+ _pos, __minheap_obj_size(_heap), _func, _args)
+#define min_heapify_all_top_down_inline(_heap, _func, _args) \
+ __min_heapify_all_inline(container_of(&(_heap)->nr, min_heap_char, nr), \
+ __minheap_obj_size(_heap), _func, _args, \
+ __min_heap_sift_down_top_down_inline)
+#define min_heap_pop_top_down_inline(_heap, _func, _args) \
+ __min_heap_pop_inline(container_of(&(_heap)->nr, min_heap_char, nr), \
+ __minheap_obj_size(_heap), _func, _args, \
+ __min_heap_sift_down_top_down_inline)
+#define min_heap_pop_push_top_down_inline(_heap, _element, _func, _args) \
+ __min_heap_pop_push_inline(container_of(&(_heap)->nr, min_heap_char, nr), _element, \
+ __minheap_obj_size(_heap), _func, _args, \
+ __min_heap_sift_down_top_down_inline)
+#define min_heap_del_top_down_inline(_heap, _idx, _func, _args) \
+ __min_heap_del_inline(container_of(&(_heap)->nr, min_heap_char, nr), \
+ __minheap_obj_size(_heap), _idx, _func, _args, \
+ __min_heap_sift_down_top_down_inline))
+
+void __min_heap_sift_down_top_down(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args);
+
+#define min_heap_sift_down_top_down(_heap, _pos, _func, _args) \
+ __min_heap_sift_down(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \
+ __minheap_obj_size(_heap), _func, _args)
+#define min_heapify_all_top_down(_heap, _func, _args) \
+ __min_heapify_all(container_of(&(_heap)->nr, min_heap_char, nr), \
+ __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down)
+#define min_heap_pop_top_down(_heap, _func, _args) \
+ __min_heap_pop(container_of(&(_heap)->nr, min_heap_char, nr), \
+ __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down)
+#define min_heap_pop_push_top_down(_heap, _element, _func, _args) \
+ __min_heap_pop_push(container_of(&(_heap)->nr, min_heap_char, nr), _element, \
+ __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down)
+#define min_heap_del_top_down(_heap, _idx, _func, _args) \
+ __min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr), \
+ __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down_top_down)
+
#endif /* _LINUX_MIN_HEAP_H */
diff --git a/lib/min_heap.c b/lib/min_heap.c
index 4ec425788783..a10d3a7cc525 100644
--- a/lib/min_heap.c
+++ b/lib/min_heap.c
@@ -27,6 +27,13 @@ void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
}
EXPORT_SYMBOL(__min_heap_sift_down);
+void __min_heap_sift_down_top_down(min_heap_char *heap, int pos, size_t elem_size,
+ const struct min_heap_callbacks *func, void *args)
+{
+ __min_heap_sift_down_top_down_inline(heap, pos, elem_size, func, args);
+}
+EXPORT_SYMBOL(__min_heap_sift_down_top_down);
+
void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
const struct min_heap_callbacks *func, void *args)
{
--
2.50.0.rc1.591.g9c95f17f64-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap
2025-06-06 7:19 [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
2025-06-06 7:19 ` [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used Robert Pang
2025-06-06 7:19 ` [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements Robert Pang
@ 2025-06-06 7:19 ` Robert Pang
2025-06-06 13:01 ` Kuan-Wei Chiu
2 siblings, 1 reply; 7+ messages in thread
From: Robert Pang @ 2025-06-06 7:19 UTC (permalink / raw)
To: Coly Li, Kent Overstreet, linux-bcache; +Cc: Robert Pang, Kuan-Wei Chiu
In commit "lib/min_heap: introduce non-inline versions of min heap API functions"
(92a8b22), bcache migrates to the generic lib min_heap for all heap operations.
This causes sizeable the tail IO latency regression during the cache replacement.
This commit updates invalidate_buckets_lru() to use the alternative APIs that
sift down elements using the top-down approach like bcache's own original heap
implementation.
[1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#me50a9ddd0386ce602b2f17415e02d33b8e29f533
Signed-off-by: Robert Pang <robertpang@google.com>
---
drivers/md/bcache/alloc.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 8998e61efa40..547d1cd0c7c2 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -207,15 +207,15 @@ static void invalidate_buckets_lru(struct cache *ca)
if (!bch_can_invalidate_bucket(ca, b))
continue;
- if (!min_heap_full(&ca->heap))
- min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca);
- else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) {
+ if (!min_heap_full_inline(&ca->heap))
+ min_heap_push_inline(&ca->heap, &b, &bucket_max_cmp_callback, ca);
+ else if (!new_bucket_max_cmp(&b, min_heap_peek_inline(&ca->heap), ca)) {
ca->heap.data[0] = b;
- min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca);
+ min_heap_sift_down_top_down_inline(&ca->heap, 0, &bucket_max_cmp_callback, ca);
}
}
- min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca);
+ min_heapify_all_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca);
while (!fifo_full(&ca->free_inc)) {
if (!ca->heap.nr) {
@@ -227,8 +227,8 @@ static void invalidate_buckets_lru(struct cache *ca)
wake_up_gc(ca->set);
return;
}
- b = min_heap_peek(&ca->heap)[0];
- min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca);
+ b = min_heap_peek_inline(&ca->heap)[0];
+ min_heap_pop_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca);
bch_invalidate_one_bucket(ca, b);
}
--
2.50.0.rc1.591.g9c95f17f64-goog
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements
2025-06-06 7:19 ` [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements Robert Pang
@ 2025-06-06 12:52 ` Kuan-Wei Chiu
0 siblings, 0 replies; 7+ messages in thread
From: Kuan-Wei Chiu @ 2025-06-06 12:52 UTC (permalink / raw)
To: Robert Pang; +Cc: Coly Li, Kent Overstreet, linux-bcache
On Fri, Jun 06, 2025 at 12:19:44AM -0700, Robert Pang wrote:
> Add these min_heap functions that re-introduce the conventional top-down
> strategy to sift down elements. This strategy offers significant performance
> improvements for data that are mostly identical. [1]
>
> - heapify_all_top_down
> - heap_pop_top_down
> - heap_pop_push_top_down
> - heap_del_top_down
>
> [1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#m155a21be72ff0cc57d825affbcafc77ac5c2dd0d
Nit: I'd prefer using a Link: tag here.
>
> Signed-off-by: Robert Pang <robertpang@google.com>
> ---
> include/linux/min_heap.h | 75 ++++++++++++++++++++++++++++++++++++++++
> lib/min_heap.c | 7 ++++
> 2 files changed, 82 insertions(+)
>
> diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
> index 1fe6772170e7..149069317bb3 100644
> --- a/include/linux/min_heap.h
> +++ b/include/linux/min_heap.h
> @@ -494,4 +494,79 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
> __min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr), \
> __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down)
>
> +static __always_inline
> +void __min_heap_sift_down_top_down_inline(min_heap_char *heap, int pos, size_t elem_size,
> + const struct min_heap_callbacks *func, void *args)
> +{
> + void *data = heap->data;
> + void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
> + /* pre-scale counters for performance */
> + size_t a = pos * elem_size;
> + size_t b, c, d, smallest;
> + size_t n = heap->nr * elem_size;
> +
> + if (!swp)
> + swp = select_swap_func(data, elem_size);
> +
> + for (;;) {
> + if (2 * a + elem_size >= n)
> + break;
> +
> + c = 2 * a + elem_size;
> + b = a;
> + smallest = b;
> + if (func->less(data + c, data + smallest, args))
> + smallest = c;
> +
> + if (c + elem_size < n) {
> + d = c + elem_size;
> + if (func->less(data + d, data + smallest, args))
> + smallest = d;
> + }
> + if (smallest == b)
> + break;
> + do_swap(data + smallest, data + b, elem_size, swp, args);
> + a = (smallest == c) ? c : d;
> + }
> +}
The logic looks correct, but we actually only need variables a, b, and
c. The use of d and the extra nested if seem unnecessary. I think the
following version is shorter and easier to understand:
for (;;) {
b = 2 * a + elem_size;
c = b + elem_size;
smallest = a;
if (b >= n)
break;
if (func->less(data + b, data + smallest, args))
smallest = b;
if (c < n && func->less(data + c, data + smallest, args))
smallest = c;
if (smallest == a)
break;
do_swap(data + a, data + smallest, elem_size, swp, args);
a = smallest;
}
> +
> +#define min_heap_sift_down_top_down_inline(_heap, _pos, _func, _args) \
> + __min_heap_sift_down_top_down_inline(container_of(&(_heap)->nr, min_heap_char, nr), \
> + _pos, __minheap_obj_size(_heap), _func, _args)
> +#define min_heapify_all_top_down_inline(_heap, _func, _args) \
> + __min_heapify_all_inline(container_of(&(_heap)->nr, min_heap_char, nr), \
> + __minheap_obj_size(_heap), _func, _args, \
> + __min_heap_sift_down_top_down_inline)
> +#define min_heap_pop_top_down_inline(_heap, _func, _args) \
> + __min_heap_pop_inline(container_of(&(_heap)->nr, min_heap_char, nr), \
> + __minheap_obj_size(_heap), _func, _args, \
> + __min_heap_sift_down_top_down_inline)
> +#define min_heap_pop_push_top_down_inline(_heap, _element, _func, _args) \
> + __min_heap_pop_push_inline(container_of(&(_heap)->nr, min_heap_char, nr), _element, \
> + __minheap_obj_size(_heap), _func, _args, \
> + __min_heap_sift_down_top_down_inline)
> +#define min_heap_del_top_down_inline(_heap, _idx, _func, _args) \
> + __min_heap_del_inline(container_of(&(_heap)->nr, min_heap_char, nr), \
> + __minheap_obj_size(_heap), _idx, _func, _args, \
> + __min_heap_sift_down_top_down_inline))
> +
> +void __min_heap_sift_down_top_down(min_heap_char *heap, int pos, size_t elem_size,
> + const struct min_heap_callbacks *func, void *args);
> +
> +#define min_heap_sift_down_top_down(_heap, _pos, _func, _args) \
> + __min_heap_sift_down(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \
> + __minheap_obj_size(_heap), _func, _args)
> +#define min_heapify_all_top_down(_heap, _func, _args) \
> + __min_heapify_all(container_of(&(_heap)->nr, min_heap_char, nr), \
> + __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down)
> +#define min_heap_pop_top_down(_heap, _func, _args) \
> + __min_heap_pop(container_of(&(_heap)->nr, min_heap_char, nr), \
> + __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down)
> +#define min_heap_pop_push_top_down(_heap, _element, _func, _args) \
> + __min_heap_pop_push(container_of(&(_heap)->nr, min_heap_char, nr), _element, \
> + __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down)
> +#define min_heap_del_top_down(_heap, _idx, _func, _args) \
> + __min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr), \
> + __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down_top_down)
> +
I think we should document in Documentation/core-api/min_heap.rst why
the *_top_down variants exist and how to choose between them.
Otherwise, it could be confusing for future users.
Regards,
Kuan-Wei
> #endif /* _LINUX_MIN_HEAP_H */
> diff --git a/lib/min_heap.c b/lib/min_heap.c
> index 4ec425788783..a10d3a7cc525 100644
> --- a/lib/min_heap.c
> +++ b/lib/min_heap.c
> @@ -27,6 +27,13 @@ void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
> }
> EXPORT_SYMBOL(__min_heap_sift_down);
>
> +void __min_heap_sift_down_top_down(min_heap_char *heap, int pos, size_t elem_size,
> + const struct min_heap_callbacks *func, void *args)
> +{
> + __min_heap_sift_down_top_down_inline(heap, pos, elem_size, func, args);
> +}
> +EXPORT_SYMBOL(__min_heap_sift_down_top_down);
> +
> void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
> const struct min_heap_callbacks *func, void *args)
> {
> --
> 2.50.0.rc1.591.g9c95f17f64-goog
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap
2025-06-06 7:19 ` [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
@ 2025-06-06 13:01 ` Kuan-Wei Chiu
2025-06-10 12:44 ` Robert Pang
0 siblings, 1 reply; 7+ messages in thread
From: Kuan-Wei Chiu @ 2025-06-06 13:01 UTC (permalink / raw)
To: Robert Pang; +Cc: Coly Li, Kent Overstreet, linux-bcache
On Fri, Jun 06, 2025 at 12:19:45AM -0700, Robert Pang wrote:
> In commit "lib/min_heap: introduce non-inline versions of min heap API functions"
> (92a8b22), bcache migrates to the generic lib min_heap for all heap operations.
> This causes sizeable the tail IO latency regression during the cache replacement.
Nit: According to the documentation, I'd prefer referencing the commit
like this:
92a8b224b833 ("lib/min_heap: introduce non-inline versions of min heap
API functions")
https://docs.kernel.org/process/submitting-patches.html#describe-your-changes
Also, if the regression is caused by the heapify method, shouldn't the
commit that introduced it be 866898efbb25 ("bcache: remove heap-related
macros and switch to generic min_heap") ?
>
> This commit updates invalidate_buckets_lru() to use the alternative APIs that
> sift down elements using the top-down approach like bcache's own original heap
> implementation.
>
> [1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#me50a9ddd0386ce602b2f17415e02d33b8e29f533
>
> Signed-off-by: Robert Pang <robertpang@google.com>
> ---
> drivers/md/bcache/alloc.c | 14 +++++++-------
> 1 file changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
> index 8998e61efa40..547d1cd0c7c2 100644
> --- a/drivers/md/bcache/alloc.c
> +++ b/drivers/md/bcache/alloc.c
> @@ -207,15 +207,15 @@ static void invalidate_buckets_lru(struct cache *ca)
> if (!bch_can_invalidate_bucket(ca, b))
> continue;
>
> - if (!min_heap_full(&ca->heap))
> - min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca);
> - else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) {
> + if (!min_heap_full_inline(&ca->heap))
> + min_heap_push_inline(&ca->heap, &b, &bucket_max_cmp_callback, ca);
If the regression is caused by the heapify method rather than the
inline vs non-inline change, is it necessary to switch to the
non-inline version here?
Regards,
Kuan-Wei
> + else if (!new_bucket_max_cmp(&b, min_heap_peek_inline(&ca->heap), ca)) {
> ca->heap.data[0] = b;
> - min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca);
> + min_heap_sift_down_top_down_inline(&ca->heap, 0, &bucket_max_cmp_callback, ca);
> }
> }
>
> - min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca);
> + min_heapify_all_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca);
>
> while (!fifo_full(&ca->free_inc)) {
> if (!ca->heap.nr) {
> @@ -227,8 +227,8 @@ static void invalidate_buckets_lru(struct cache *ca)
> wake_up_gc(ca->set);
> return;
> }
> - b = min_heap_peek(&ca->heap)[0];
> - min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca);
> + b = min_heap_peek_inline(&ca->heap)[0];
> + min_heap_pop_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca);
>
> bch_invalidate_one_bucket(ca, b);
> }
> --
> 2.50.0.rc1.591.g9c95f17f64-goog
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap
2025-06-06 13:01 ` Kuan-Wei Chiu
@ 2025-06-10 12:44 ` Robert Pang
0 siblings, 0 replies; 7+ messages in thread
From: Robert Pang @ 2025-06-10 12:44 UTC (permalink / raw)
To: Kuan-Wei Chiu; +Cc: Coly Li, Kent Overstreet, linux-bcache
When I tested this patch series initially, merely switching to the
traditional top-down sift-down alone did not resolve the latency
regression fully. It requires both the top-down sift-down plus
inlining together to match the original latency numbers before the
migration to lib/min_heap API. As I understand, the
invalidate_buckets_lru() is performance-critical and requires both
optimizations.
Best regards
Robert
On Fri, Jun 6, 2025 at 10:01 PM Kuan-Wei Chiu <visitorckw@gmail.com> wrote:
>
> On Fri, Jun 06, 2025 at 12:19:45AM -0700, Robert Pang wrote:
> > In commit "lib/min_heap: introduce non-inline versions of min heap API functions"
> > (92a8b22), bcache migrates to the generic lib min_heap for all heap operations.
> > This causes sizeable the tail IO latency regression during the cache replacement.
>
> Nit: According to the documentation, I'd prefer referencing the commit
> like this:
>
> 92a8b224b833 ("lib/min_heap: introduce non-inline versions of min heap
> API functions")
> https://docs.kernel.org/process/submitting-patches.html#describe-your-changes
>
> Also, if the regression is caused by the heapify method, shouldn't the
> commit that introduced it be 866898efbb25 ("bcache: remove heap-related
> macros and switch to generic min_heap") ?
>
> >
> > This commit updates invalidate_buckets_lru() to use the alternative APIs that
> > sift down elements using the top-down approach like bcache's own original heap
> > implementation.
> >
> > [1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#me50a9ddd0386ce602b2f17415e02d33b8e29f533
> >
> > Signed-off-by: Robert Pang <robertpang@google.com>
> > ---
> > drivers/md/bcache/alloc.c | 14 +++++++-------
> > 1 file changed, 7 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
> > index 8998e61efa40..547d1cd0c7c2 100644
> > --- a/drivers/md/bcache/alloc.c
> > +++ b/drivers/md/bcache/alloc.c
> > @@ -207,15 +207,15 @@ static void invalidate_buckets_lru(struct cache *ca)
> > if (!bch_can_invalidate_bucket(ca, b))
> > continue;
> >
> > - if (!min_heap_full(&ca->heap))
> > - min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca);
> > - else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) {
> > + if (!min_heap_full_inline(&ca->heap))
> > + min_heap_push_inline(&ca->heap, &b, &bucket_max_cmp_callback, ca);
>
> If the regression is caused by the heapify method rather than the
> inline vs non-inline change, is it necessary to switch to the
> non-inline version here?
>
> Regards,
> Kuan-Wei
>
> > + else if (!new_bucket_max_cmp(&b, min_heap_peek_inline(&ca->heap), ca)) {
> > ca->heap.data[0] = b;
> > - min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca);
> > + min_heap_sift_down_top_down_inline(&ca->heap, 0, &bucket_max_cmp_callback, ca);
> > }
> > }
> >
> > - min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca);
> > + min_heapify_all_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca);
> >
> > while (!fifo_full(&ca->free_inc)) {
> > if (!ca->heap.nr) {
> > @@ -227,8 +227,8 @@ static void invalidate_buckets_lru(struct cache *ca)
> > wake_up_gc(ca->set);
> > return;
> > }
> > - b = min_heap_peek(&ca->heap)[0];
> > - min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca);
> > + b = min_heap_peek_inline(&ca->heap)[0];
> > + min_heap_pop_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca);
> >
> > bch_invalidate_one_bucket(ca, b);
> > }
> > --
> > 2.50.0.rc1.591.g9c95f17f64-goog
> >
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2025-06-10 12:44 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-06 7:19 [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
2025-06-06 7:19 ` [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used Robert Pang
2025-06-06 7:19 ` [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements Robert Pang
2025-06-06 12:52 ` Kuan-Wei Chiu
2025-06-06 7:19 ` [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
2025-06-06 13:01 ` Kuan-Wei Chiu
2025-06-10 12:44 ` Robert Pang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox