All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kuan-Wei Chiu <visitorckw@gmail.com>
To: colyli@suse.de, kent.overstreet@linux.dev
Cc: bfoster@redhat.com, jserv@ccns.ncku.edu.tw,
	linux-bcache@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-bcachefs@vger.kernel.org,
	Kuan-Wei Chiu <visitorckw@gmail.com>
Subject: [PATCH 1/5] bcachefs: Optimize eytzinger0_sort() using bottom-up heapsort
Date: Sun, 21 Jan 2024 23:36:45 +0800	[thread overview]
Message-ID: <20240121153649.2733274-2-visitorckw@gmail.com> (raw)
In-Reply-To: <20240121153649.2733274-1-visitorckw@gmail.com>

This optimization reduces the average number of comparisons required
from 2*n*log2(n) - 3*n + o(n) to n*log2(n) + 0.37*n + o(n). When n is
sufficiently large, it results in approximately 50% fewer comparisons.

Currently, eytzinger0_sort employs the textbook version of heapsort,
where during the heapify process, each level requires two comparisons
to determine the maximum among three elements. In contrast, the
bottom-up heapsort, during heapify, only compares two children at each
level until reaching a leaf node. Then, it backtracks from the leaf
node to find the correct position. Since heapify typically continues
until very close to the leaf node, the standard heapify requires about
2*log2(n) comparisons, while the bottom-up variant only needs log2(n)
comparisons.

The experimental data presented below is based on an array generated
by get_random_u32().

|   N   | comparisons(old) | comparisons(new) | time(old) | time(new) |
|-------|------------------|------------------|-----------|-----------|
| 10000 |     235381       |     136615       |  25545 us |  20366 us |
| 20000 |     510694       |     293425       |  31336 us |  18312 us |
| 30000 |     800384       |     457412       |  35042 us |  27386 us |
| 40000 |    1101617       |     626831       |  48779 us |  38253 us |
| 50000 |    1409762       |     799637       |  62238 us |  46950 us |
| 60000 |    1721191       |     974521       |  75588 us |  58367 us |
| 70000 |    2038536       |    1152171       |  90823 us |  68778 us |
| 80000 |    2362958       |    1333472       | 104165 us |  78625 us |
| 90000 |    2690900       |    1516065       | 116111 us |  89573 us |
| 100000|    3019413       |    1699879       | 133638 us | 100998 us |

Refs:
  BOTTOM-UP-HEAPSORT, a new variant of HEAPSORT beating, on an average,
  QUICKSORT (if n is not very small)
  Ingo Wegener
  Theoretical Computer Science, 118(1); Pages 81-98, 13 September 1993
  https://doi.org/10.1016/0304-3975(93)90364-Y

Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
---
This patch has undergone unit testing and micro benchmarking using the
following code [1].

[1]:
static long long int cmp_count = 0;

static int mycmp(const void *a, const void *b, size_t size)
{
    u32 _a = *(u32 *)a;
	u32 _b = *(u32 *)b;

	cmp_count++;
	if (_a < _b)
		return -1;
	else if (_a > _b)
		return 1;
	else
		return 0;
}

static int test(void)
{
    size_t N, i, L, R;
	ktime_t start, end;
	s64 delta;
    u32 *arr;

	for (N = 10000; N <= 100000; N += 10000) {
		arr = kmalloc_array(N, sizeof(u32), GFP_KERNEL);
		cmp_count = 0;

		for (i = 0; i < N; i++)
			arr[i] = get_random_u32();
		
		start = ktime_get();
		eytzinger0_sort(arr, N, sizeof(u32), mycmp, NULL);
		end = ktime_get();

		delta = ktime_us_delta(end, start);
        printk(KERN_INFO "time: %lld\n", delta);
		printk(KERN_INFO "comparisons: %lld\n", cmp_count);

		for (int i = 0; i < N; i++) {
            L = 2 * i + 1;
            R = 2 * i + 2;
            if (L < N && arr[i] < arr[L])
                goto err;
            if (R < N && arr[i] > arr[R])
                goto err;
        }

		kfree(arr);
	}
    return 0;

err:
    kfree(arr);
    return -1;
}

 fs/bcachefs/util.c | 50 +++++++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index c2ef7cddaa4f..bbc83b43162e 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -911,7 +911,7 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
 		     int (*cmp_func)(const void *, const void *, size_t),
 		     void (*swap_func)(void *, void *, size_t))
 {
-	int i, c, r;
+	int i, j, k;
 
 	if (!swap_func) {
 		if (size == 4 && alignment_ok(base, 4))
@@ -924,17 +924,22 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
 
 	/* heapify */
 	for (i = n / 2 - 1; i >= 0; --i) {
-		for (r = i; r * 2 + 1 < n; r = c) {
-			c = r * 2 + 1;
-
-			if (c + 1 < n &&
-			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-				c++;
-
-			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-				break;
-
-			do_swap(base, n, size, swap_func, r, c);
+		/* Find the sift-down path all the way to the leaves. */
+		for (j = i; k = j * 2 + 1, k + 1 < n;)
+			j = do_cmp(base, n, size, cmp_func, k, k + 1) > 0 ? k : k + 1;
+
+		/* Special case for the last leaf with no sibling. */
+		if (j * 2 + 2 == n)
+			j = j * 2 + 1;
+
+		/* Backtrack to the correct location. */
+		while (j != i && do_cmp(base, n, size, cmp_func, i, j) >= 0)
+			j = (j - 1) / 2;
+
+		/* Shift the element into its correct place. */
+		for (k = j; j != i;) {
+			j = (j - 1) / 2;
+			do_swap(base, n, size, swap_func, j, k);
 		}
 	}
 
@@ -942,17 +947,22 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
 	for (i = n - 1; i > 0; --i) {
 		do_swap(base, n, size, swap_func, 0, i);
 
-		for (r = 0; r * 2 + 1 < i; r = c) {
-			c = r * 2 + 1;
+		/* Find the sift-down path all the way to the leaves. */
+		for (j = 0; k = j * 2 + 1, k + 1 < i;)
+			j = do_cmp(base, n, size, cmp_func, k, k + 1) > 0 ? k : k + 1;
 
-			if (c + 1 < i &&
-			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-				c++;
+		/* Special case for the last leaf with no sibling. */
+		if (j * 2 + 2 == i)
+			j = j * 2 + 1;
 
-			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-				break;
+		/* Backtrack to the correct location. */
+		while (j && do_cmp(base, n, size, cmp_func, 0, j) >= 0)
+			j = (j - 1) / 2;
 
-			do_swap(base, n, size, swap_func, r, c);
+		/* Shift the element into its correct place. */
+		for (k = j; j;) {
+			j = (j - 1) / 2;
+			do_swap(base, n, size, swap_func, j, k);
 		}
 	}
 }
-- 
2.25.1


  reply	other threads:[~2024-01-21 15:37 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-21 15:36 [PATCH 0/5] Optimize number of comparisons for heap/heapsort implementaion Kuan-Wei Chiu
2024-01-21 15:36 ` Kuan-Wei Chiu [this message]
2024-01-21 15:36 ` [PATCH 2/5] bcachefs: Introduce parent function for sort_cmp_size() Kuan-Wei Chiu
2024-01-21 16:17   ` Kent Overstreet
2024-01-21 17:05     ` Kuan-Wei Chiu
2024-01-21 17:37       ` Kent Overstreet
2024-01-21 15:36 ` [PATCH 3/5] bcachefs: Optimize sort_cmp_size() using bottom-up heapsort Kuan-Wei Chiu
2024-01-21 15:36 ` [PATCH 4/5] bcachefs: Optimize number of comparisons in heap_sift_down Kuan-Wei Chiu
2024-01-21 15:36 ` [PATCH 5/5] bcache: Optimize number of comparisons in heap_sift Kuan-Wei Chiu
2024-01-21 16:21 ` [PATCH 0/5] Optimize number of comparisons for heap/heapsort implementaion Kent Overstreet
2024-01-21 16:55   ` Kuan-Wei Chiu
2024-01-21 17:41     ` Kent Overstreet
2024-01-22 15:06       ` Kuan-Wei Chiu
2024-01-22 16:06         ` Kent Overstreet
2024-01-22 16:23           ` Kuan-Wei Chiu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240121153649.2733274-2-visitorckw@gmail.com \
    --to=visitorckw@gmail.com \
    --cc=bfoster@redhat.com \
    --cc=colyli@suse.de \
    --cc=jserv@ccns.ncku.edu.tw \
    --cc=kent.overstreet@linux.dev \
    --cc=linux-bcache@vger.kernel.org \
    --cc=linux-bcachefs@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.