* [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap
@ 2025-06-06 7:19 Robert Pang
2025-06-06 7:19 ` [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used Robert Pang
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Robert Pang @ 2025-06-06 7:19 UTC (permalink / raw)
To: Coly Li, Kent Overstreet, linux-bcache; +Cc: Robert Pang, Kuan-Wei Chiu
This patch series reverts bcache to its original top-down heap sifting strategy
for LRG cache replacement, which fixes a tail I/O latency regression.
Discussion: https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#me50a9ddd0386ce602b2f17415e02d33b8e29f533
Robert Pang (3):
lib min_heap: refactor min_heap to allow the alternative sift-down
function to be used
lib min_heap: add alternative APIs that use the conventional top-down
strategy to sift down elements
bcache: Fix the tail IO latency regression due to the use of lib
min_heap
drivers/md/bcache/alloc.c | 14 ++--
include/linux/min_heap.h | 135 ++++++++++++++++++++++++++++++++------
lib/min_heap.c | 31 ++++++---
3 files changed, 145 insertions(+), 35 deletions(-)
--
2.50.0.rc1.591.g9c95f17f64-goog
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used 2025-06-06 7:19 [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang @ 2025-06-06 7:19 ` Robert Pang 2025-06-06 7:19 ` [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements Robert Pang 2025-06-06 7:19 ` [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang 2 siblings, 0 replies; 7+ messages in thread From: Robert Pang @ 2025-06-06 7:19 UTC (permalink / raw) To: Coly Li, Kent Overstreet, linux-bcache; +Cc: Robert Pang, Kuan-Wei Chiu Refactor these min_heap's internal functions that require sifting down elements to take the sift-down function to use. This change will allow for the use of alternative sift-down strategies, potentially offering significant performance improvements for certain data distributions compared to the current bottom-up approach. - heapify_all - heap_pop - heap_pop_push - heap_del Signed-off-by: Robert Pang <robertpang@google.com> --- include/linux/min_heap.h | 60 ++++++++++++++++++++++++++-------------- lib/min_heap.c | 24 ++++++++++------ 2 files changed, 56 insertions(+), 28 deletions(-) diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 1160bed6579e..1fe6772170e7 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -322,22 +322,27 @@ void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx /* Floyd's approach to heapification that is O(nr). */ static __always_inline void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args) + const struct min_heap_callbacks *func, void *args, + void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args)) { int i; for (i = heap->nr / 2 - 1; i >= 0; i--) - __min_heap_sift_down_inline(heap, i, elem_size, func, args); + sift_down(heap, i, elem_size, func, args); } #define min_heapify_all_inline(_heap, _func, _args) \ __min_heapify_all_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ - __minheap_obj_size(_heap), _func, _args) + __minheap_obj_size(_heap), _func, _args, \ + __min_heap_sift_down_inline) /* Remove minimum element from the heap, O(log2(nr)). */ static __always_inline bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args) + const struct min_heap_callbacks *func, void *args, + void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args)) { void *data = heap->data; @@ -347,14 +352,15 @@ bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size, /* Place last element at the root (position 0) and then sift down. */ heap->nr--; memcpy(data, data + (heap->nr * elem_size), elem_size); - __min_heap_sift_down_inline(heap, 0, elem_size, func, args); + sift_down(heap, 0, elem_size, func, args); return true; } #define min_heap_pop_inline(_heap, _func, _args) \ __min_heap_pop_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ - __minheap_obj_size(_heap), _func, _args) + __minheap_obj_size(_heap), _func, _args, \ + __min_heap_sift_down_inline) /* * Remove the minimum element and then push the given element. The @@ -363,15 +369,18 @@ bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size, */ static __always_inline void __min_heap_pop_push_inline(min_heap_char *heap, const void *element, size_t elem_size, - const struct min_heap_callbacks *func, void *args) + const struct min_heap_callbacks *func, void *args, + void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args)) { memcpy(heap->data, element, elem_size); - __min_heap_sift_down_inline(heap, 0, elem_size, func, args); + sift_down(heap, 0, elem_size, func, args); } #define min_heap_pop_push_inline(_heap, _element, _func, _args) \ __min_heap_pop_push_inline(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ - __minheap_obj_size(_heap), _func, _args) + __minheap_obj_size(_heap), _func, _args, \ + __min_heap_sift_down_inline) /* Push an element on to the heap, O(log2(nr)). */ static __always_inline @@ -402,7 +411,9 @@ bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t ele /* Remove ith element from the heap, O(log2(nr)). */ static __always_inline bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx, - const struct min_heap_callbacks *func, void *args) + const struct min_heap_callbacks *func, void *args, + void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args)) { void *data = heap->data; void (*swp)(void *lhs, void *rhs, void *args) = func->swp; @@ -419,14 +430,15 @@ bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx, return true; do_swap(data + (idx * elem_size), data + (heap->nr * elem_size), elem_size, swp, args); __min_heap_sift_up_inline(heap, elem_size, idx, func, args); - __min_heap_sift_down_inline(heap, idx, elem_size, func, args); + sift_down(heap, idx, elem_size, func, args); return true; } #define min_heap_del_inline(_heap, _idx, _func, _args) \ __min_heap_del_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ - __minheap_obj_size(_heap), _idx, _func, _args) + __minheap_obj_size(_heap), _idx, _func, _args, \ + __min_heap_sift_down_inline)) void __min_heap_init(min_heap_char *heap, void *data, int size); void *__min_heap_peek(struct min_heap_char *heap); @@ -436,15 +448,23 @@ void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, const struct min_heap_callbacks *func, void *args); void __min_heapify_all(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args); + const struct min_heap_callbacks *func, void *args, + void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args)); bool __min_heap_pop(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args); + const struct min_heap_callbacks *func, void *args, + void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args)); void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size, - const struct min_heap_callbacks *func, void *args); + const struct min_heap_callbacks *func, void *args, + void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args)); bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, const struct min_heap_callbacks *func, void *args); bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, - const struct min_heap_callbacks *func, void *args); + const struct min_heap_callbacks *func, void *args, + void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args)); #define min_heap_init(_heap, _data, _size) \ __min_heap_init(container_of(&(_heap)->nr, min_heap_char, nr), _data, _size) @@ -460,18 +480,18 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, __minheap_obj_size(_heap), _idx, _func, _args) #define min_heapify_all(_heap, _func, _args) \ __min_heapify_all(container_of(&(_heap)->nr, min_heap_char, nr), \ - __minheap_obj_size(_heap), _func, _args) + __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down) #define min_heap_pop(_heap, _func, _args) \ __min_heap_pop(container_of(&(_heap)->nr, min_heap_char, nr), \ - __minheap_obj_size(_heap), _func, _args) + __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down) #define min_heap_pop_push(_heap, _element, _func, _args) \ __min_heap_pop_push(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ - __minheap_obj_size(_heap), _func, _args) + __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down) #define min_heap_push(_heap, _element, _func, _args) \ __min_heap_push(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ __minheap_obj_size(_heap), _func, _args) #define min_heap_del(_heap, _idx, _func, _args) \ __min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr), \ - __minheap_obj_size(_heap), _idx, _func, _args) + __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down) #endif /* _LINUX_MIN_HEAP_H */ diff --git a/lib/min_heap.c b/lib/min_heap.c index 4485372ff3b1..4ec425788783 100644 --- a/lib/min_heap.c +++ b/lib/min_heap.c @@ -35,23 +35,29 @@ void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, EXPORT_SYMBOL(__min_heap_sift_up); void __min_heapify_all(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args) + const struct min_heap_callbacks *func, void *args, + void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args)) { - __min_heapify_all_inline(heap, elem_size, func, args); + __min_heapify_all_inline(heap, elem_size, func, args, sift_down); } EXPORT_SYMBOL(__min_heapify_all); bool __min_heap_pop(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args) + const struct min_heap_callbacks *func, void *args, + void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args)) { - return __min_heap_pop_inline(heap, elem_size, func, args); + return __min_heap_pop_inline(heap, elem_size, func, args, sift_down); } EXPORT_SYMBOL(__min_heap_pop); void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size, - const struct min_heap_callbacks *func, void *args) + const struct min_heap_callbacks *func, void *args, + void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args)) { - __min_heap_pop_push_inline(heap, element, elem_size, func, args); + __min_heap_pop_push_inline(heap, element, elem_size, func, args, sift_down); } EXPORT_SYMBOL(__min_heap_pop_push); @@ -63,8 +69,10 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, EXPORT_SYMBOL(__min_heap_push); bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, - const struct min_heap_callbacks *func, void *args) + const struct min_heap_callbacks *func, void *args, + void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args)) { - return __min_heap_del_inline(heap, elem_size, idx, func, args); + return __min_heap_del_inline(heap, elem_size, idx, func, args, sift_down); } EXPORT_SYMBOL(__min_heap_del); -- 2.50.0.rc1.591.g9c95f17f64-goog ^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements 2025-06-06 7:19 [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang 2025-06-06 7:19 ` [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used Robert Pang @ 2025-06-06 7:19 ` Robert Pang 2025-06-06 12:52 ` Kuan-Wei Chiu 2025-06-06 7:19 ` [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang 2 siblings, 1 reply; 7+ messages in thread From: Robert Pang @ 2025-06-06 7:19 UTC (permalink / raw) To: Coly Li, Kent Overstreet, linux-bcache; +Cc: Robert Pang, Kuan-Wei Chiu Add these min_heap functions that re-introduce the conventional top-down strategy to sift down elements. This strategy offers significant performance improvements for data that are mostly identical. [1] - heapify_all_top_down - heap_pop_top_down - heap_pop_push_top_down - heap_del_top_down [1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#m155a21be72ff0cc57d825affbcafc77ac5c2dd0d Signed-off-by: Robert Pang <robertpang@google.com> --- include/linux/min_heap.h | 75 ++++++++++++++++++++++++++++++++++++++++ lib/min_heap.c | 7 ++++ 2 files changed, 82 insertions(+) diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 1fe6772170e7..149069317bb3 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -494,4 +494,79 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, __min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr), \ __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down) +static __always_inline +void __min_heap_sift_down_top_down_inline(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + void *data = heap->data; + void (*swp)(void *lhs, void *rhs, void *args) = func->swp; + /* pre-scale counters for performance */ + size_t a = pos * elem_size; + size_t b, c, d, smallest; + size_t n = heap->nr * elem_size; + + if (!swp) + swp = select_swap_func(data, elem_size); + + for (;;) { + if (2 * a + elem_size >= n) + break; + + c = 2 * a + elem_size; + b = a; + smallest = b; + if (func->less(data + c, data + smallest, args)) + smallest = c; + + if (c + elem_size < n) { + d = c + elem_size; + if (func->less(data + d, data + smallest, args)) + smallest = d; + } + if (smallest == b) + break; + do_swap(data + smallest, data + b, elem_size, swp, args); + a = (smallest == c) ? c : d; + } +} + +#define min_heap_sift_down_top_down_inline(_heap, _pos, _func, _args) \ + __min_heap_sift_down_top_down_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ + _pos, __minheap_obj_size(_heap), _func, _args) +#define min_heapify_all_top_down_inline(_heap, _func, _args) \ + __min_heapify_all_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _func, _args, \ + __min_heap_sift_down_top_down_inline) +#define min_heap_pop_top_down_inline(_heap, _func, _args) \ + __min_heap_pop_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _func, _args, \ + __min_heap_sift_down_top_down_inline) +#define min_heap_pop_push_top_down_inline(_heap, _element, _func, _args) \ + __min_heap_pop_push_inline(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ + __minheap_obj_size(_heap), _func, _args, \ + __min_heap_sift_down_top_down_inline) +#define min_heap_del_top_down_inline(_heap, _idx, _func, _args) \ + __min_heap_del_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _idx, _func, _args, \ + __min_heap_sift_down_top_down_inline)) + +void __min_heap_sift_down_top_down(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args); + +#define min_heap_sift_down_top_down(_heap, _pos, _func, _args) \ + __min_heap_sift_down(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ + __minheap_obj_size(_heap), _func, _args) +#define min_heapify_all_top_down(_heap, _func, _args) \ + __min_heapify_all(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down) +#define min_heap_pop_top_down(_heap, _func, _args) \ + __min_heap_pop(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down) +#define min_heap_pop_push_top_down(_heap, _element, _func, _args) \ + __min_heap_pop_push(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ + __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down) +#define min_heap_del_top_down(_heap, _idx, _func, _args) \ + __min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down_top_down) + #endif /* _LINUX_MIN_HEAP_H */ diff --git a/lib/min_heap.c b/lib/min_heap.c index 4ec425788783..a10d3a7cc525 100644 --- a/lib/min_heap.c +++ b/lib/min_heap.c @@ -27,6 +27,13 @@ void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, } EXPORT_SYMBOL(__min_heap_sift_down); +void __min_heap_sift_down_top_down(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + __min_heap_sift_down_top_down_inline(heap, pos, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heap_sift_down_top_down); + void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, const struct min_heap_callbacks *func, void *args) { -- 2.50.0.rc1.591.g9c95f17f64-goog ^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements 2025-06-06 7:19 ` [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements Robert Pang @ 2025-06-06 12:52 ` Kuan-Wei Chiu 0 siblings, 0 replies; 7+ messages in thread From: Kuan-Wei Chiu @ 2025-06-06 12:52 UTC (permalink / raw) To: Robert Pang; +Cc: Coly Li, Kent Overstreet, linux-bcache On Fri, Jun 06, 2025 at 12:19:44AM -0700, Robert Pang wrote: > Add these min_heap functions that re-introduce the conventional top-down > strategy to sift down elements. This strategy offers significant performance > improvements for data that are mostly identical. [1] > > - heapify_all_top_down > - heap_pop_top_down > - heap_pop_push_top_down > - heap_del_top_down > > [1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#m155a21be72ff0cc57d825affbcafc77ac5c2dd0d Nit: I'd prefer using a Link: tag here. > > Signed-off-by: Robert Pang <robertpang@google.com> > --- > include/linux/min_heap.h | 75 ++++++++++++++++++++++++++++++++++++++++ > lib/min_heap.c | 7 ++++ > 2 files changed, 82 insertions(+) > > diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h > index 1fe6772170e7..149069317bb3 100644 > --- a/include/linux/min_heap.h > +++ b/include/linux/min_heap.h > @@ -494,4 +494,79 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, > __min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr), \ > __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down) > > +static __always_inline > +void __min_heap_sift_down_top_down_inline(min_heap_char *heap, int pos, size_t elem_size, > + const struct min_heap_callbacks *func, void *args) > +{ > + void *data = heap->data; > + void (*swp)(void *lhs, void *rhs, void *args) = func->swp; > + /* pre-scale counters for performance */ > + size_t a = pos * elem_size; > + size_t b, c, d, smallest; > + size_t n = heap->nr * elem_size; > + > + if (!swp) > + swp = select_swap_func(data, elem_size); > + > + for (;;) { > + if (2 * a + elem_size >= n) > + break; > + > + c = 2 * a + elem_size; > + b = a; > + smallest = b; > + if (func->less(data + c, data + smallest, args)) > + smallest = c; > + > + if (c + elem_size < n) { > + d = c + elem_size; > + if (func->less(data + d, data + smallest, args)) > + smallest = d; > + } > + if (smallest == b) > + break; > + do_swap(data + smallest, data + b, elem_size, swp, args); > + a = (smallest == c) ? c : d; > + } > +} The logic looks correct, but we actually only need variables a, b, and c. The use of d and the extra nested if seem unnecessary. I think the following version is shorter and easier to understand: for (;;) { b = 2 * a + elem_size; c = b + elem_size; smallest = a; if (b >= n) break; if (func->less(data + b, data + smallest, args)) smallest = b; if (c < n && func->less(data + c, data + smallest, args)) smallest = c; if (smallest == a) break; do_swap(data + a, data + smallest, elem_size, swp, args); a = smallest; } > + > +#define min_heap_sift_down_top_down_inline(_heap, _pos, _func, _args) \ > + __min_heap_sift_down_top_down_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ > + _pos, __minheap_obj_size(_heap), _func, _args) > +#define min_heapify_all_top_down_inline(_heap, _func, _args) \ > + __min_heapify_all_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ > + __minheap_obj_size(_heap), _func, _args, \ > + __min_heap_sift_down_top_down_inline) > +#define min_heap_pop_top_down_inline(_heap, _func, _args) \ > + __min_heap_pop_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ > + __minheap_obj_size(_heap), _func, _args, \ > + __min_heap_sift_down_top_down_inline) > +#define min_heap_pop_push_top_down_inline(_heap, _element, _func, _args) \ > + __min_heap_pop_push_inline(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ > + __minheap_obj_size(_heap), _func, _args, \ > + __min_heap_sift_down_top_down_inline) > +#define min_heap_del_top_down_inline(_heap, _idx, _func, _args) \ > + __min_heap_del_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ > + __minheap_obj_size(_heap), _idx, _func, _args, \ > + __min_heap_sift_down_top_down_inline)) > + > +void __min_heap_sift_down_top_down(min_heap_char *heap, int pos, size_t elem_size, > + const struct min_heap_callbacks *func, void *args); > + > +#define min_heap_sift_down_top_down(_heap, _pos, _func, _args) \ > + __min_heap_sift_down(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ > + __minheap_obj_size(_heap), _func, _args) > +#define min_heapify_all_top_down(_heap, _func, _args) \ > + __min_heapify_all(container_of(&(_heap)->nr, min_heap_char, nr), \ > + __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down) > +#define min_heap_pop_top_down(_heap, _func, _args) \ > + __min_heap_pop(container_of(&(_heap)->nr, min_heap_char, nr), \ > + __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down) > +#define min_heap_pop_push_top_down(_heap, _element, _func, _args) \ > + __min_heap_pop_push(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ > + __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down) > +#define min_heap_del_top_down(_heap, _idx, _func, _args) \ > + __min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr), \ > + __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down_top_down) > + I think we should document in Documentation/core-api/min_heap.rst why the *_top_down variants exist and how to choose between them. Otherwise, it could be confusing for future users. Regards, Kuan-Wei > #endif /* _LINUX_MIN_HEAP_H */ > diff --git a/lib/min_heap.c b/lib/min_heap.c > index 4ec425788783..a10d3a7cc525 100644 > --- a/lib/min_heap.c > +++ b/lib/min_heap.c > @@ -27,6 +27,13 @@ void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, > } > EXPORT_SYMBOL(__min_heap_sift_down); > > +void __min_heap_sift_down_top_down(min_heap_char *heap, int pos, size_t elem_size, > + const struct min_heap_callbacks *func, void *args) > +{ > + __min_heap_sift_down_top_down_inline(heap, pos, elem_size, func, args); > +} > +EXPORT_SYMBOL(__min_heap_sift_down_top_down); > + > void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, > const struct min_heap_callbacks *func, void *args) > { > -- > 2.50.0.rc1.591.g9c95f17f64-goog > ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap 2025-06-06 7:19 [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang 2025-06-06 7:19 ` [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used Robert Pang 2025-06-06 7:19 ` [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements Robert Pang @ 2025-06-06 7:19 ` Robert Pang 2025-06-06 13:01 ` Kuan-Wei Chiu 2 siblings, 1 reply; 7+ messages in thread From: Robert Pang @ 2025-06-06 7:19 UTC (permalink / raw) To: Coly Li, Kent Overstreet, linux-bcache; +Cc: Robert Pang, Kuan-Wei Chiu In commit "lib/min_heap: introduce non-inline versions of min heap API functions" (92a8b22), bcache migrates to the generic lib min_heap for all heap operations. This causes sizeable the tail IO latency regression during the cache replacement. This commit updates invalidate_buckets_lru() to use the alternative APIs that sift down elements using the top-down approach like bcache's own original heap implementation. [1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#me50a9ddd0386ce602b2f17415e02d33b8e29f533 Signed-off-by: Robert Pang <robertpang@google.com> --- drivers/md/bcache/alloc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 8998e61efa40..547d1cd0c7c2 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -207,15 +207,15 @@ static void invalidate_buckets_lru(struct cache *ca) if (!bch_can_invalidate_bucket(ca, b)) continue; - if (!min_heap_full(&ca->heap)) - min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca); - else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) { + if (!min_heap_full_inline(&ca->heap)) + min_heap_push_inline(&ca->heap, &b, &bucket_max_cmp_callback, ca); + else if (!new_bucket_max_cmp(&b, min_heap_peek_inline(&ca->heap), ca)) { ca->heap.data[0] = b; - min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca); + min_heap_sift_down_top_down_inline(&ca->heap, 0, &bucket_max_cmp_callback, ca); } } - min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca); + min_heapify_all_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca); while (!fifo_full(&ca->free_inc)) { if (!ca->heap.nr) { @@ -227,8 +227,8 @@ static void invalidate_buckets_lru(struct cache *ca) wake_up_gc(ca->set); return; } - b = min_heap_peek(&ca->heap)[0]; - min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca); + b = min_heap_peek_inline(&ca->heap)[0]; + min_heap_pop_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca); bch_invalidate_one_bucket(ca, b); } -- 2.50.0.rc1.591.g9c95f17f64-goog ^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap 2025-06-06 7:19 ` [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang @ 2025-06-06 13:01 ` Kuan-Wei Chiu 2025-06-10 12:44 ` Robert Pang 0 siblings, 1 reply; 7+ messages in thread From: Kuan-Wei Chiu @ 2025-06-06 13:01 UTC (permalink / raw) To: Robert Pang; +Cc: Coly Li, Kent Overstreet, linux-bcache On Fri, Jun 06, 2025 at 12:19:45AM -0700, Robert Pang wrote: > In commit "lib/min_heap: introduce non-inline versions of min heap API functions" > (92a8b22), bcache migrates to the generic lib min_heap for all heap operations. > This causes sizeable the tail IO latency regression during the cache replacement. Nit: According to the documentation, I'd prefer referencing the commit like this: 92a8b224b833 ("lib/min_heap: introduce non-inline versions of min heap API functions") https://docs.kernel.org/process/submitting-patches.html#describe-your-changes Also, if the regression is caused by the heapify method, shouldn't the commit that introduced it be 866898efbb25 ("bcache: remove heap-related macros and switch to generic min_heap") ? > > This commit updates invalidate_buckets_lru() to use the alternative APIs that > sift down elements using the top-down approach like bcache's own original heap > implementation. > > [1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#me50a9ddd0386ce602b2f17415e02d33b8e29f533 > > Signed-off-by: Robert Pang <robertpang@google.com> > --- > drivers/md/bcache/alloc.c | 14 +++++++------- > 1 file changed, 7 insertions(+), 7 deletions(-) > > diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c > index 8998e61efa40..547d1cd0c7c2 100644 > --- a/drivers/md/bcache/alloc.c > +++ b/drivers/md/bcache/alloc.c > @@ -207,15 +207,15 @@ static void invalidate_buckets_lru(struct cache *ca) > if (!bch_can_invalidate_bucket(ca, b)) > continue; > > - if (!min_heap_full(&ca->heap)) > - min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca); > - else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) { > + if (!min_heap_full_inline(&ca->heap)) > + min_heap_push_inline(&ca->heap, &b, &bucket_max_cmp_callback, ca); If the regression is caused by the heapify method rather than the inline vs non-inline change, is it necessary to switch to the non-inline version here? Regards, Kuan-Wei > + else if (!new_bucket_max_cmp(&b, min_heap_peek_inline(&ca->heap), ca)) { > ca->heap.data[0] = b; > - min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca); > + min_heap_sift_down_top_down_inline(&ca->heap, 0, &bucket_max_cmp_callback, ca); > } > } > > - min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca); > + min_heapify_all_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca); > > while (!fifo_full(&ca->free_inc)) { > if (!ca->heap.nr) { > @@ -227,8 +227,8 @@ static void invalidate_buckets_lru(struct cache *ca) > wake_up_gc(ca->set); > return; > } > - b = min_heap_peek(&ca->heap)[0]; > - min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca); > + b = min_heap_peek_inline(&ca->heap)[0]; > + min_heap_pop_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca); > > bch_invalidate_one_bucket(ca, b); > } > -- > 2.50.0.rc1.591.g9c95f17f64-goog > ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap 2025-06-06 13:01 ` Kuan-Wei Chiu @ 2025-06-10 12:44 ` Robert Pang 0 siblings, 0 replies; 7+ messages in thread From: Robert Pang @ 2025-06-10 12:44 UTC (permalink / raw) To: Kuan-Wei Chiu; +Cc: Coly Li, Kent Overstreet, linux-bcache When I tested this patch series initially, merely switching to the traditional top-down sift-down alone did not resolve the latency regression fully. It requires both the top-down sift-down plus inlining together to match the original latency numbers before the migration to lib/min_heap API. As I understand, the invalidate_buckets_lru() is performance-critical and requires both optimizations. Best regards Robert On Fri, Jun 6, 2025 at 10:01 PM Kuan-Wei Chiu <visitorckw@gmail.com> wrote: > > On Fri, Jun 06, 2025 at 12:19:45AM -0700, Robert Pang wrote: > > In commit "lib/min_heap: introduce non-inline versions of min heap API functions" > > (92a8b22), bcache migrates to the generic lib min_heap for all heap operations. > > This causes sizeable the tail IO latency regression during the cache replacement. > > Nit: According to the documentation, I'd prefer referencing the commit > like this: > > 92a8b224b833 ("lib/min_heap: introduce non-inline versions of min heap > API functions") > https://docs.kernel.org/process/submitting-patches.html#describe-your-changes > > Also, if the regression is caused by the heapify method, shouldn't the > commit that introduced it be 866898efbb25 ("bcache: remove heap-related > macros and switch to generic min_heap") ? > > > > > This commit updates invalidate_buckets_lru() to use the alternative APIs that > > sift down elements using the top-down approach like bcache's own original heap > > implementation. > > > > [1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#me50a9ddd0386ce602b2f17415e02d33b8e29f533 > > > > Signed-off-by: Robert Pang <robertpang@google.com> > > --- > > drivers/md/bcache/alloc.c | 14 +++++++------- > > 1 file changed, 7 insertions(+), 7 deletions(-) > > > > diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c > > index 8998e61efa40..547d1cd0c7c2 100644 > > --- a/drivers/md/bcache/alloc.c > > +++ b/drivers/md/bcache/alloc.c > > @@ -207,15 +207,15 @@ static void invalidate_buckets_lru(struct cache *ca) > > if (!bch_can_invalidate_bucket(ca, b)) > > continue; > > > > - if (!min_heap_full(&ca->heap)) > > - min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca); > > - else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) { > > + if (!min_heap_full_inline(&ca->heap)) > > + min_heap_push_inline(&ca->heap, &b, &bucket_max_cmp_callback, ca); > > If the regression is caused by the heapify method rather than the > inline vs non-inline change, is it necessary to switch to the > non-inline version here? > > Regards, > Kuan-Wei > > > + else if (!new_bucket_max_cmp(&b, min_heap_peek_inline(&ca->heap), ca)) { > > ca->heap.data[0] = b; > > - min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca); > > + min_heap_sift_down_top_down_inline(&ca->heap, 0, &bucket_max_cmp_callback, ca); > > } > > } > > > > - min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca); > > + min_heapify_all_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca); > > > > while (!fifo_full(&ca->free_inc)) { > > if (!ca->heap.nr) { > > @@ -227,8 +227,8 @@ static void invalidate_buckets_lru(struct cache *ca) > > wake_up_gc(ca->set); > > return; > > } > > - b = min_heap_peek(&ca->heap)[0]; > > - min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca); > > + b = min_heap_peek_inline(&ca->heap)[0]; > > + min_heap_pop_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca); > > > > bch_invalidate_one_bucket(ca, b); > > } > > -- > > 2.50.0.rc1.591.g9c95f17f64-goog > > ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2025-06-10 12:44 UTC | newest] Thread overview: 7+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2025-06-06 7:19 [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang 2025-06-06 7:19 ` [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used Robert Pang 2025-06-06 7:19 ` [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements Robert Pang 2025-06-06 12:52 ` Kuan-Wei Chiu 2025-06-06 7:19 ` [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang 2025-06-06 13:01 ` Kuan-Wei Chiu 2025-06-10 12:44 ` Robert Pang
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox