public inbox for linux-bcache@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap
@ 2025-06-06  7:19 Robert Pang
  2025-06-06  7:19 ` [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used Robert Pang
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Robert Pang @ 2025-06-06  7:19 UTC (permalink / raw)
  To: Coly Li, Kent Overstreet, linux-bcache; +Cc: Robert Pang, Kuan-Wei Chiu

This patch series reverts bcache to its original top-down heap sifting strategy
for LRG cache replacement, which fixes a tail I/O latency regression.

Discussion: https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#me50a9ddd0386ce602b2f17415e02d33b8e29f533

Robert Pang (3):
  lib min_heap: refactor min_heap to allow the alternative sift-down
    function to be used
  lib min_heap: add alternative APIs that use the conventional top-down
    strategy to sift down elements
  bcache: Fix the tail IO latency regression due to the use of lib
    min_heap

 drivers/md/bcache/alloc.c |  14 ++--
 include/linux/min_heap.h  | 135 ++++++++++++++++++++++++++++++++------
 lib/min_heap.c            |  31 ++++++---
 3 files changed, 145 insertions(+), 35 deletions(-)

-- 
2.50.0.rc1.591.g9c95f17f64-goog


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used
  2025-06-06  7:19 [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
@ 2025-06-06  7:19 ` Robert Pang
  2025-06-06  7:19 ` [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements Robert Pang
  2025-06-06  7:19 ` [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
  2 siblings, 0 replies; 7+ messages in thread
From: Robert Pang @ 2025-06-06  7:19 UTC (permalink / raw)
  To: Coly Li, Kent Overstreet, linux-bcache; +Cc: Robert Pang, Kuan-Wei Chiu

Refactor these min_heap's internal functions that require sifting down elements
to take the sift-down function to use. This change will allow for the use of
alternative sift-down strategies, potentially offering significant performance
improvements for certain data distributions compared to the current bottom-up
approach.

- heapify_all
- heap_pop
- heap_pop_push
- heap_del

Signed-off-by: Robert Pang <robertpang@google.com>
---
 include/linux/min_heap.h | 60 ++++++++++++++++++++++++++--------------
 lib/min_heap.c           | 24 ++++++++++------
 2 files changed, 56 insertions(+), 28 deletions(-)

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 1160bed6579e..1fe6772170e7 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -322,22 +322,27 @@ void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx
 /* Floyd's approach to heapification that is O(nr). */
 static __always_inline
 void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size,
-			      const struct min_heap_callbacks *func, void *args)
+			      const struct min_heap_callbacks *func, void *args,
+			      void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+                                                const struct min_heap_callbacks *func, void *args))
 {
 	int i;
 
 	for (i = heap->nr / 2 - 1; i >= 0; i--)
-		__min_heap_sift_down_inline(heap, i, elem_size, func, args);
+		sift_down(heap, i, elem_size, func, args);
 }
 
 #define min_heapify_all_inline(_heap, _func, _args)	\
 	__min_heapify_all_inline(container_of(&(_heap)->nr, min_heap_char, nr),	\
-				 __minheap_obj_size(_heap), _func, _args)
+				 __minheap_obj_size(_heap), _func, _args,	\
+				 __min_heap_sift_down_inline)
 
 /* Remove minimum element from the heap, O(log2(nr)). */
 static __always_inline
 bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size,
-			   const struct min_heap_callbacks *func, void *args)
+			   const struct min_heap_callbacks *func, void *args,
+			   void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+                                             const struct min_heap_callbacks *func, void *args))
 {
 	void *data = heap->data;
 
@@ -347,14 +352,15 @@ bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size,
 	/* Place last element at the root (position 0) and then sift down. */
 	heap->nr--;
 	memcpy(data, data + (heap->nr * elem_size), elem_size);
-	__min_heap_sift_down_inline(heap, 0, elem_size, func, args);
+	sift_down(heap, 0, elem_size, func, args);
 
 	return true;
 }
 
 #define min_heap_pop_inline(_heap, _func, _args)	\
 	__min_heap_pop_inline(container_of(&(_heap)->nr, min_heap_char, nr),	\
-			      __minheap_obj_size(_heap), _func, _args)
+			      __minheap_obj_size(_heap), _func, _args,	\
+			      __min_heap_sift_down_inline)
 
 /*
  * Remove the minimum element and then push the given element. The
@@ -363,15 +369,18 @@ bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size,
  */
 static __always_inline
 void __min_heap_pop_push_inline(min_heap_char *heap, const void *element, size_t elem_size,
-				const struct min_heap_callbacks *func, void *args)
+				const struct min_heap_callbacks *func, void *args,
+				void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+                                                  const struct min_heap_callbacks *func, void *args))
 {
 	memcpy(heap->data, element, elem_size);
-	__min_heap_sift_down_inline(heap, 0, elem_size, func, args);
+	sift_down(heap, 0, elem_size, func, args);
 }
 
 #define min_heap_pop_push_inline(_heap, _element, _func, _args)	\
 	__min_heap_pop_push_inline(container_of(&(_heap)->nr, min_heap_char, nr), _element,	\
-				   __minheap_obj_size(_heap), _func, _args)
+				   __minheap_obj_size(_heap), _func, _args,	\
+				   __min_heap_sift_down_inline)
 
 /* Push an element on to the heap, O(log2(nr)). */
 static __always_inline
@@ -402,7 +411,9 @@ bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t ele
 /* Remove ith element from the heap, O(log2(nr)). */
 static __always_inline
 bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx,
-			   const struct min_heap_callbacks *func, void *args)
+			   const struct min_heap_callbacks *func, void *args,
+			   void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+                                             const struct min_heap_callbacks *func, void *args))
 {
 	void *data = heap->data;
 	void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
@@ -419,14 +430,15 @@ bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx,
 		return true;
 	do_swap(data + (idx * elem_size), data + (heap->nr * elem_size), elem_size, swp, args);
 	__min_heap_sift_up_inline(heap, elem_size, idx, func, args);
-	__min_heap_sift_down_inline(heap, idx, elem_size, func, args);
+	sift_down(heap, idx, elem_size, func, args);
 
 	return true;
 }
 
 #define min_heap_del_inline(_heap, _idx, _func, _args)	\
 	__min_heap_del_inline(container_of(&(_heap)->nr, min_heap_char, nr),	\
-			      __minheap_obj_size(_heap), _idx, _func, _args)
+			      __minheap_obj_size(_heap), _idx, _func, _args,	\
+			      __min_heap_sift_down_inline))
 
 void __min_heap_init(min_heap_char *heap, void *data, int size);
 void *__min_heap_peek(struct min_heap_char *heap);
@@ -436,15 +448,23 @@ void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
 void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
 			const struct min_heap_callbacks *func, void *args);
 void __min_heapify_all(min_heap_char *heap, size_t elem_size,
-		       const struct min_heap_callbacks *func, void *args);
+		       const struct min_heap_callbacks *func, void *args,
+                       void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+                                         const struct min_heap_callbacks *func, void *args));
 bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
-		    const struct min_heap_callbacks *func, void *args);
+		    const struct min_heap_callbacks *func, void *args,
+                    void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+                                      const struct min_heap_callbacks *func, void *args));
 void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size,
-			 const struct min_heap_callbacks *func, void *args);
+			 const struct min_heap_callbacks *func, void *args,
+                         void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+                                           const struct min_heap_callbacks *func, void *args));
 bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
 		     const struct min_heap_callbacks *func, void *args);
 bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
-		    const struct min_heap_callbacks *func, void *args);
+		    const struct min_heap_callbacks *func, void *args,
+                    void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+                                      const struct min_heap_callbacks *func, void *args));
 
 #define min_heap_init(_heap, _data, _size)	\
 	__min_heap_init(container_of(&(_heap)->nr, min_heap_char, nr), _data, _size)
@@ -460,18 +480,18 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
 			   __minheap_obj_size(_heap), _idx, _func, _args)
 #define min_heapify_all(_heap, _func, _args)	\
 	__min_heapify_all(container_of(&(_heap)->nr, min_heap_char, nr),	\
-			  __minheap_obj_size(_heap), _func, _args)
+			  __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down)
 #define min_heap_pop(_heap, _func, _args)	\
 	__min_heap_pop(container_of(&(_heap)->nr, min_heap_char, nr),	\
-		       __minheap_obj_size(_heap), _func, _args)
+		       __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down)
 #define min_heap_pop_push(_heap, _element, _func, _args)	\
 	__min_heap_pop_push(container_of(&(_heap)->nr, min_heap_char, nr), _element,	\
-			    __minheap_obj_size(_heap), _func, _args)
+			    __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down)
 #define min_heap_push(_heap, _element, _func, _args)	\
 	__min_heap_push(container_of(&(_heap)->nr, min_heap_char, nr), _element,	\
 			__minheap_obj_size(_heap), _func, _args)
 #define min_heap_del(_heap, _idx, _func, _args)	\
 	__min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr),	\
-		       __minheap_obj_size(_heap), _idx, _func, _args)
+		       __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down)
 
 #endif /* _LINUX_MIN_HEAP_H */
diff --git a/lib/min_heap.c b/lib/min_heap.c
index 4485372ff3b1..4ec425788783 100644
--- a/lib/min_heap.c
+++ b/lib/min_heap.c
@@ -35,23 +35,29 @@ void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
 EXPORT_SYMBOL(__min_heap_sift_up);
 
 void __min_heapify_all(min_heap_char *heap, size_t elem_size,
-		       const struct min_heap_callbacks *func, void *args)
+		       const struct min_heap_callbacks *func, void *args,
+		       void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+                                         const struct min_heap_callbacks *func, void *args))
 {
-	__min_heapify_all_inline(heap, elem_size, func, args);
+	__min_heapify_all_inline(heap, elem_size, func, args, sift_down);
 }
 EXPORT_SYMBOL(__min_heapify_all);
 
 bool __min_heap_pop(min_heap_char *heap, size_t elem_size,
-		    const struct min_heap_callbacks *func, void *args)
+		    const struct min_heap_callbacks *func, void *args,
+		    void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+                                      const struct min_heap_callbacks *func, void *args))
 {
-	return __min_heap_pop_inline(heap, elem_size, func, args);
+	return __min_heap_pop_inline(heap, elem_size, func, args, sift_down);
 }
 EXPORT_SYMBOL(__min_heap_pop);
 
 void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size,
-			 const struct min_heap_callbacks *func, void *args)
+			 const struct min_heap_callbacks *func, void *args,
+			 void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+                                           const struct min_heap_callbacks *func, void *args))
 {
-	__min_heap_pop_push_inline(heap, element, elem_size, func, args);
+	__min_heap_pop_push_inline(heap, element, elem_size, func, args, sift_down);
 }
 EXPORT_SYMBOL(__min_heap_pop_push);
 
@@ -63,8 +69,10 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size,
 EXPORT_SYMBOL(__min_heap_push);
 
 bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
-		    const struct min_heap_callbacks *func, void *args)
+		    const struct min_heap_callbacks *func, void *args,
+		    void (*sift_down)(min_heap_char *heap, int pos, size_t elem_size,
+                                      const struct min_heap_callbacks *func, void *args))
 {
-	return __min_heap_del_inline(heap, elem_size, idx, func, args);
+	return __min_heap_del_inline(heap, elem_size, idx, func, args, sift_down);
 }
 EXPORT_SYMBOL(__min_heap_del);
-- 
2.50.0.rc1.591.g9c95f17f64-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements
  2025-06-06  7:19 [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
  2025-06-06  7:19 ` [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used Robert Pang
@ 2025-06-06  7:19 ` Robert Pang
  2025-06-06 12:52   ` Kuan-Wei Chiu
  2025-06-06  7:19 ` [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
  2 siblings, 1 reply; 7+ messages in thread
From: Robert Pang @ 2025-06-06  7:19 UTC (permalink / raw)
  To: Coly Li, Kent Overstreet, linux-bcache; +Cc: Robert Pang, Kuan-Wei Chiu

Add these min_heap functions that re-introduce the conventional top-down
strategy to sift down elements. This strategy offers significant performance
improvements for data that are mostly identical. [1]

- heapify_all_top_down
- heap_pop_top_down
- heap_pop_push_top_down
- heap_del_top_down

[1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#m155a21be72ff0cc57d825affbcafc77ac5c2dd0d

Signed-off-by: Robert Pang <robertpang@google.com>
---
 include/linux/min_heap.h | 75 ++++++++++++++++++++++++++++++++++++++++
 lib/min_heap.c           |  7 ++++
 2 files changed, 82 insertions(+)

diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
index 1fe6772170e7..149069317bb3 100644
--- a/include/linux/min_heap.h
+++ b/include/linux/min_heap.h
@@ -494,4 +494,79 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
 	__min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr),	\
 		       __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down)
 
+static __always_inline
+void __min_heap_sift_down_top_down_inline(min_heap_char *heap, int pos, size_t elem_size,
+					  const struct min_heap_callbacks *func, void *args)
+{
+	void *data = heap->data;
+	void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
+	/* pre-scale counters for performance */
+	size_t a = pos * elem_size;
+	size_t b, c, d, smallest;
+	size_t n = heap->nr * elem_size;
+
+	if (!swp)
+		swp = select_swap_func(data, elem_size);
+
+	for (;;) {
+		if (2 * a + elem_size >= n)
+			break;
+
+		c = 2 * a + elem_size;
+		b = a;
+		smallest = b;
+		if (func->less(data + c, data + smallest, args))
+			smallest = c;
+
+		if (c + elem_size < n) {
+			d = c + elem_size;
+			if (func->less(data + d, data + smallest, args))
+				smallest = d;
+		}
+		if (smallest == b)
+			break;
+		do_swap(data + smallest, data + b, elem_size, swp, args);
+		a = (smallest == c) ? c : d;
+	}
+}
+
+#define min_heap_sift_down_top_down_inline(_heap, _pos, _func, _args)	\
+	__min_heap_sift_down_top_down_inline(container_of(&(_heap)->nr, min_heap_char, nr),	\
+					     _pos, __minheap_obj_size(_heap), _func, _args)
+#define min_heapify_all_top_down_inline(_heap, _func, _args)	\
+	__min_heapify_all_inline(container_of(&(_heap)->nr, min_heap_char, nr),	\
+				 __minheap_obj_size(_heap), _func, _args,	\
+				 __min_heap_sift_down_top_down_inline)
+#define min_heap_pop_top_down_inline(_heap, _func, _args)	\
+	__min_heap_pop_inline(container_of(&(_heap)->nr, min_heap_char, nr),	\
+			      __minheap_obj_size(_heap), _func, _args,	\
+			      __min_heap_sift_down_top_down_inline)
+#define min_heap_pop_push_top_down_inline(_heap, _element, _func, _args)	\
+	__min_heap_pop_push_inline(container_of(&(_heap)->nr, min_heap_char, nr), _element,	\
+				   __minheap_obj_size(_heap), _func, _args,	\
+				   __min_heap_sift_down_top_down_inline)
+#define min_heap_del_top_down_inline(_heap, _idx, _func, _args)	\
+	__min_heap_del_inline(container_of(&(_heap)->nr, min_heap_char, nr),	\
+			      __minheap_obj_size(_heap), _idx, _func, _args,	\
+			      __min_heap_sift_down_top_down_inline))
+
+void __min_heap_sift_down_top_down(min_heap_char *heap, int pos, size_t elem_size,
+                                   const struct min_heap_callbacks *func, void *args);
+
+#define min_heap_sift_down_top_down(_heap, _pos, _func, _args)	\
+	__min_heap_sift_down(container_of(&(_heap)->nr, min_heap_char, nr), _pos,	\
+			     __minheap_obj_size(_heap), _func, _args)
+#define min_heapify_all_top_down(_heap, _func, _args)	\
+	__min_heapify_all(container_of(&(_heap)->nr, min_heap_char, nr),	\
+			  __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down)
+#define min_heap_pop_top_down(_heap, _func, _args)	\
+	__min_heap_pop(container_of(&(_heap)->nr, min_heap_char, nr),	\
+		       __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down)
+#define min_heap_pop_push_top_down(_heap, _element, _func, _args)	\
+	__min_heap_pop_push(container_of(&(_heap)->nr, min_heap_char, nr), _element,	\
+			    __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down)
+#define min_heap_del_top_down(_heap, _idx, _func, _args)	\
+	__min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr),	\
+		       __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down_top_down)
+
 #endif /* _LINUX_MIN_HEAP_H */
diff --git a/lib/min_heap.c b/lib/min_heap.c
index 4ec425788783..a10d3a7cc525 100644
--- a/lib/min_heap.c
+++ b/lib/min_heap.c
@@ -27,6 +27,13 @@ void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
 }
 EXPORT_SYMBOL(__min_heap_sift_down);
 
+void __min_heap_sift_down_top_down(min_heap_char *heap, int pos, size_t elem_size,
+				   const struct min_heap_callbacks *func, void *args)
+{
+	__min_heap_sift_down_top_down_inline(heap, pos, elem_size, func, args);
+}
+EXPORT_SYMBOL(__min_heap_sift_down_top_down);
+
 void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
 			const struct min_heap_callbacks *func, void *args)
 {
-- 
2.50.0.rc1.591.g9c95f17f64-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap
  2025-06-06  7:19 [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
  2025-06-06  7:19 ` [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used Robert Pang
  2025-06-06  7:19 ` [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements Robert Pang
@ 2025-06-06  7:19 ` Robert Pang
  2025-06-06 13:01   ` Kuan-Wei Chiu
  2 siblings, 1 reply; 7+ messages in thread
From: Robert Pang @ 2025-06-06  7:19 UTC (permalink / raw)
  To: Coly Li, Kent Overstreet, linux-bcache; +Cc: Robert Pang, Kuan-Wei Chiu

In commit "lib/min_heap: introduce non-inline versions of min heap API functions"
(92a8b22), bcache migrates to the generic lib min_heap for all heap operations.
This causes sizeable the tail IO latency regression during the cache replacement.

This commit updates invalidate_buckets_lru() to use the alternative APIs that
sift down elements using the top-down approach like bcache's own original heap
implementation.

[1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#me50a9ddd0386ce602b2f17415e02d33b8e29f533

Signed-off-by: Robert Pang <robertpang@google.com>
---
 drivers/md/bcache/alloc.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 8998e61efa40..547d1cd0c7c2 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -207,15 +207,15 @@ static void invalidate_buckets_lru(struct cache *ca)
 		if (!bch_can_invalidate_bucket(ca, b))
 			continue;
 
-		if (!min_heap_full(&ca->heap))
-			min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca);
-		else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) {
+		if (!min_heap_full_inline(&ca->heap))
+			min_heap_push_inline(&ca->heap, &b, &bucket_max_cmp_callback, ca);
+		else if (!new_bucket_max_cmp(&b, min_heap_peek_inline(&ca->heap), ca)) {
 			ca->heap.data[0] = b;
-			min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca);
+			min_heap_sift_down_top_down_inline(&ca->heap, 0, &bucket_max_cmp_callback, ca);
 		}
 	}
 
-	min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca);
+	min_heapify_all_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca);
 
 	while (!fifo_full(&ca->free_inc)) {
 		if (!ca->heap.nr) {
@@ -227,8 +227,8 @@ static void invalidate_buckets_lru(struct cache *ca)
 			wake_up_gc(ca->set);
 			return;
 		}
-		b = min_heap_peek(&ca->heap)[0];
-		min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca);
+		b = min_heap_peek_inline(&ca->heap)[0];
+		min_heap_pop_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca);
 
 		bch_invalidate_one_bucket(ca, b);
 	}
-- 
2.50.0.rc1.591.g9c95f17f64-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements
  2025-06-06  7:19 ` [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements Robert Pang
@ 2025-06-06 12:52   ` Kuan-Wei Chiu
  0 siblings, 0 replies; 7+ messages in thread
From: Kuan-Wei Chiu @ 2025-06-06 12:52 UTC (permalink / raw)
  To: Robert Pang; +Cc: Coly Li, Kent Overstreet, linux-bcache

On Fri, Jun 06, 2025 at 12:19:44AM -0700, Robert Pang wrote:
> Add these min_heap functions that re-introduce the conventional top-down
> strategy to sift down elements. This strategy offers significant performance
> improvements for data that are mostly identical. [1]
> 
> - heapify_all_top_down
> - heap_pop_top_down
> - heap_pop_push_top_down
> - heap_del_top_down
> 
> [1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#m155a21be72ff0cc57d825affbcafc77ac5c2dd0d

Nit: I'd prefer using a Link: tag here.

> 
> Signed-off-by: Robert Pang <robertpang@google.com>
> ---
>  include/linux/min_heap.h | 75 ++++++++++++++++++++++++++++++++++++++++
>  lib/min_heap.c           |  7 ++++
>  2 files changed, 82 insertions(+)
> 
> diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
> index 1fe6772170e7..149069317bb3 100644
> --- a/include/linux/min_heap.h
> +++ b/include/linux/min_heap.h
> @@ -494,4 +494,79 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx,
>  	__min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr),	\
>  		       __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down)
>  
> +static __always_inline
> +void __min_heap_sift_down_top_down_inline(min_heap_char *heap, int pos, size_t elem_size,
> +					  const struct min_heap_callbacks *func, void *args)
> +{
> +	void *data = heap->data;
> +	void (*swp)(void *lhs, void *rhs, void *args) = func->swp;
> +	/* pre-scale counters for performance */
> +	size_t a = pos * elem_size;
> +	size_t b, c, d, smallest;
> +	size_t n = heap->nr * elem_size;
> +
> +	if (!swp)
> +		swp = select_swap_func(data, elem_size);
> +
> +	for (;;) {
> +		if (2 * a + elem_size >= n)
> +			break;
> +
> +		c = 2 * a + elem_size;
> +		b = a;
> +		smallest = b;
> +		if (func->less(data + c, data + smallest, args))
> +			smallest = c;
> +
> +		if (c + elem_size < n) {
> +			d = c + elem_size;
> +			if (func->less(data + d, data + smallest, args))
> +				smallest = d;
> +		}
> +		if (smallest == b)
> +			break;
> +		do_swap(data + smallest, data + b, elem_size, swp, args);
> +		a = (smallest == c) ? c : d;
> +	}
> +}

The logic looks correct, but we actually only need variables a, b, and
c. The use of d and the extra nested if seem unnecessary. I think the
following version is shorter and easier to understand:

for (;;) {
	b = 2 * a + elem_size;
	c = b + elem_size;
	smallest = a;

	if (b >= n)
		break;

	if (func->less(data + b, data + smallest, args))
		smallest = b;

	if (c < n && func->less(data + c, data + smallest, args))
		smallest = c;

	if (smallest == a)
		break;

	do_swap(data + a, data + smallest, elem_size, swp, args);
	a = smallest;
}

> +
> +#define min_heap_sift_down_top_down_inline(_heap, _pos, _func, _args)	\
> +	__min_heap_sift_down_top_down_inline(container_of(&(_heap)->nr, min_heap_char, nr),	\
> +					     _pos, __minheap_obj_size(_heap), _func, _args)
> +#define min_heapify_all_top_down_inline(_heap, _func, _args)	\
> +	__min_heapify_all_inline(container_of(&(_heap)->nr, min_heap_char, nr),	\
> +				 __minheap_obj_size(_heap), _func, _args,	\
> +				 __min_heap_sift_down_top_down_inline)
> +#define min_heap_pop_top_down_inline(_heap, _func, _args)	\
> +	__min_heap_pop_inline(container_of(&(_heap)->nr, min_heap_char, nr),	\
> +			      __minheap_obj_size(_heap), _func, _args,	\
> +			      __min_heap_sift_down_top_down_inline)
> +#define min_heap_pop_push_top_down_inline(_heap, _element, _func, _args)	\
> +	__min_heap_pop_push_inline(container_of(&(_heap)->nr, min_heap_char, nr), _element,	\
> +				   __minheap_obj_size(_heap), _func, _args,	\
> +				   __min_heap_sift_down_top_down_inline)
> +#define min_heap_del_top_down_inline(_heap, _idx, _func, _args)	\
> +	__min_heap_del_inline(container_of(&(_heap)->nr, min_heap_char, nr),	\
> +			      __minheap_obj_size(_heap), _idx, _func, _args,	\
> +			      __min_heap_sift_down_top_down_inline))
> +
> +void __min_heap_sift_down_top_down(min_heap_char *heap, int pos, size_t elem_size,
> +                                   const struct min_heap_callbacks *func, void *args);
> +
> +#define min_heap_sift_down_top_down(_heap, _pos, _func, _args)	\
> +	__min_heap_sift_down(container_of(&(_heap)->nr, min_heap_char, nr), _pos,	\
> +			     __minheap_obj_size(_heap), _func, _args)
> +#define min_heapify_all_top_down(_heap, _func, _args)	\
> +	__min_heapify_all(container_of(&(_heap)->nr, min_heap_char, nr),	\
> +			  __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down)
> +#define min_heap_pop_top_down(_heap, _func, _args)	\
> +	__min_heap_pop(container_of(&(_heap)->nr, min_heap_char, nr),	\
> +		       __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down)
> +#define min_heap_pop_push_top_down(_heap, _element, _func, _args)	\
> +	__min_heap_pop_push(container_of(&(_heap)->nr, min_heap_char, nr), _element,	\
> +			    __minheap_obj_size(_heap), _func, _args, __min_heap_sift_down_top_down)
> +#define min_heap_del_top_down(_heap, _idx, _func, _args)	\
> +	__min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr),	\
> +		       __minheap_obj_size(_heap), _idx, _func, _args, __min_heap_sift_down_top_down)
> +

I think we should document in Documentation/core-api/min_heap.rst why
the *_top_down variants exist and how to choose between them.
Otherwise, it could be confusing for future users.

Regards,
Kuan-Wei

>  #endif /* _LINUX_MIN_HEAP_H */
> diff --git a/lib/min_heap.c b/lib/min_heap.c
> index 4ec425788783..a10d3a7cc525 100644
> --- a/lib/min_heap.c
> +++ b/lib/min_heap.c
> @@ -27,6 +27,13 @@ void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size,
>  }
>  EXPORT_SYMBOL(__min_heap_sift_down);
>  
> +void __min_heap_sift_down_top_down(min_heap_char *heap, int pos, size_t elem_size,
> +				   const struct min_heap_callbacks *func, void *args)
> +{
> +	__min_heap_sift_down_top_down_inline(heap, pos, elem_size, func, args);
> +}
> +EXPORT_SYMBOL(__min_heap_sift_down_top_down);
> +
>  void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx,
>  			const struct min_heap_callbacks *func, void *args)
>  {
> -- 
> 2.50.0.rc1.591.g9c95f17f64-goog
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap
  2025-06-06  7:19 ` [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
@ 2025-06-06 13:01   ` Kuan-Wei Chiu
  2025-06-10 12:44     ` Robert Pang
  0 siblings, 1 reply; 7+ messages in thread
From: Kuan-Wei Chiu @ 2025-06-06 13:01 UTC (permalink / raw)
  To: Robert Pang; +Cc: Coly Li, Kent Overstreet, linux-bcache

On Fri, Jun 06, 2025 at 12:19:45AM -0700, Robert Pang wrote:
> In commit "lib/min_heap: introduce non-inline versions of min heap API functions"
> (92a8b22), bcache migrates to the generic lib min_heap for all heap operations.
> This causes sizeable the tail IO latency regression during the cache replacement.

Nit: According to the documentation, I'd prefer referencing the commit
like this:

92a8b224b833 ("lib/min_heap: introduce non-inline versions of min heap
API functions")
https://docs.kernel.org/process/submitting-patches.html#describe-your-changes

Also, if the regression is caused by the heapify method, shouldn't the
commit that introduced it be 866898efbb25 ("bcache: remove heap-related
macros and switch to generic min_heap") ?

> 
> This commit updates invalidate_buckets_lru() to use the alternative APIs that
> sift down elements using the top-down approach like bcache's own original heap
> implementation.
> 
> [1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#me50a9ddd0386ce602b2f17415e02d33b8e29f533
> 
> Signed-off-by: Robert Pang <robertpang@google.com>
> ---
>  drivers/md/bcache/alloc.c | 14 +++++++-------
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
> index 8998e61efa40..547d1cd0c7c2 100644
> --- a/drivers/md/bcache/alloc.c
> +++ b/drivers/md/bcache/alloc.c
> @@ -207,15 +207,15 @@ static void invalidate_buckets_lru(struct cache *ca)
>  		if (!bch_can_invalidate_bucket(ca, b))
>  			continue;
>  
> -		if (!min_heap_full(&ca->heap))
> -			min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca);
> -		else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) {
> +		if (!min_heap_full_inline(&ca->heap))
> +			min_heap_push_inline(&ca->heap, &b, &bucket_max_cmp_callback, ca);

If the regression is caused by the heapify method rather than the
inline vs non-inline change, is it necessary to switch to the
non-inline version here?

Regards,
Kuan-Wei

> +		else if (!new_bucket_max_cmp(&b, min_heap_peek_inline(&ca->heap), ca)) {
>  			ca->heap.data[0] = b;
> -			min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca);
> +			min_heap_sift_down_top_down_inline(&ca->heap, 0, &bucket_max_cmp_callback, ca);
>  		}
>  	}
>  
> -	min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca);
> +	min_heapify_all_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca);
>  
>  	while (!fifo_full(&ca->free_inc)) {
>  		if (!ca->heap.nr) {
> @@ -227,8 +227,8 @@ static void invalidate_buckets_lru(struct cache *ca)
>  			wake_up_gc(ca->set);
>  			return;
>  		}
> -		b = min_heap_peek(&ca->heap)[0];
> -		min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca);
> +		b = min_heap_peek_inline(&ca->heap)[0];
> +		min_heap_pop_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca);
>  
>  		bch_invalidate_one_bucket(ca, b);
>  	}
> -- 
> 2.50.0.rc1.591.g9c95f17f64-goog
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap
  2025-06-06 13:01   ` Kuan-Wei Chiu
@ 2025-06-10 12:44     ` Robert Pang
  0 siblings, 0 replies; 7+ messages in thread
From: Robert Pang @ 2025-06-10 12:44 UTC (permalink / raw)
  To: Kuan-Wei Chiu; +Cc: Coly Li, Kent Overstreet, linux-bcache

When I tested this patch series initially, merely switching to the
traditional top-down sift-down alone did not resolve the latency
regression fully. It requires both the top-down sift-down plus
inlining together to match the original latency numbers before the
migration to lib/min_heap API. As I understand, the
invalidate_buckets_lru() is performance-critical and requires both
optimizations.

Best regards
Robert

On Fri, Jun 6, 2025 at 10:01 PM Kuan-Wei Chiu <visitorckw@gmail.com> wrote:
>
> On Fri, Jun 06, 2025 at 12:19:45AM -0700, Robert Pang wrote:
> > In commit "lib/min_heap: introduce non-inline versions of min heap API functions"
> > (92a8b22), bcache migrates to the generic lib min_heap for all heap operations.
> > This causes sizeable the tail IO latency regression during the cache replacement.
>
> Nit: According to the documentation, I'd prefer referencing the commit
> like this:
>
> 92a8b224b833 ("lib/min_heap: introduce non-inline versions of min heap
> API functions")
> https://docs.kernel.org/process/submitting-patches.html#describe-your-changes
>
> Also, if the regression is caused by the heapify method, shouldn't the
> commit that introduced it be 866898efbb25 ("bcache: remove heap-related
> macros and switch to generic min_heap") ?
>
> >
> > This commit updates invalidate_buckets_lru() to use the alternative APIs that
> > sift down elements using the top-down approach like bcache's own original heap
> > implementation.
> >
> > [1] https://lore.kernel.org/linux-bcache/wtfuhfntbi6yorxqtpcs4vg5w67mvyckp2a6jmxuzt2hvbw65t@gznwsae5653d/T/#me50a9ddd0386ce602b2f17415e02d33b8e29f533
> >
> > Signed-off-by: Robert Pang <robertpang@google.com>
> > ---
> >  drivers/md/bcache/alloc.c | 14 +++++++-------
> >  1 file changed, 7 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
> > index 8998e61efa40..547d1cd0c7c2 100644
> > --- a/drivers/md/bcache/alloc.c
> > +++ b/drivers/md/bcache/alloc.c
> > @@ -207,15 +207,15 @@ static void invalidate_buckets_lru(struct cache *ca)
> >               if (!bch_can_invalidate_bucket(ca, b))
> >                       continue;
> >
> > -             if (!min_heap_full(&ca->heap))
> > -                     min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca);
> > -             else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) {
> > +             if (!min_heap_full_inline(&ca->heap))
> > +                     min_heap_push_inline(&ca->heap, &b, &bucket_max_cmp_callback, ca);
>
> If the regression is caused by the heapify method rather than the
> inline vs non-inline change, is it necessary to switch to the
> non-inline version here?
>
> Regards,
> Kuan-Wei
>
> > +             else if (!new_bucket_max_cmp(&b, min_heap_peek_inline(&ca->heap), ca)) {
> >                       ca->heap.data[0] = b;
> > -                     min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca);
> > +                     min_heap_sift_down_top_down_inline(&ca->heap, 0, &bucket_max_cmp_callback, ca);
> >               }
> >       }
> >
> > -     min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca);
> > +     min_heapify_all_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca);
> >
> >       while (!fifo_full(&ca->free_inc)) {
> >               if (!ca->heap.nr) {
> > @@ -227,8 +227,8 @@ static void invalidate_buckets_lru(struct cache *ca)
> >                       wake_up_gc(ca->set);
> >                       return;
> >               }
> > -             b = min_heap_peek(&ca->heap)[0];
> > -             min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca);
> > +             b = min_heap_peek_inline(&ca->heap)[0];
> > +             min_heap_pop_top_down_inline(&ca->heap, &bucket_min_cmp_callback, ca);
> >
> >               bch_invalidate_one_bucket(ca, b);
> >       }
> > --
> > 2.50.0.rc1.591.g9c95f17f64-goog
> >

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-06-10 12:44 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-06  7:19 [PATCH 0/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
2025-06-06  7:19 ` [PATCH 1/3] lib min_heap: refactor min_heap to allow the alternative sift-down function to be used Robert Pang
2025-06-06  7:19 ` [PATCH 2/3] lib min_heap: add alternative APIs that use the conventional top-down strategy to sift down elements Robert Pang
2025-06-06 12:52   ` Kuan-Wei Chiu
2025-06-06  7:19 ` [PATCH 3/3] bcache: Fix the tail IO latency regression due to the use of lib min_heap Robert Pang
2025-06-06 13:01   ` Kuan-Wei Chiu
2025-06-10 12:44     ` Robert Pang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox