From: Kuan-Wei Chiu <visitorckw@gmail.com>
To: colyli@suse.de, kent.overstreet@linux.dev
Cc: bfoster@redhat.com, jserv@ccns.ncku.edu.tw,
linux-bcache@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-bcachefs@vger.kernel.org,
Kuan-Wei Chiu <visitorckw@gmail.com>
Subject: [PATCH 1/5] bcachefs: Optimize eytzinger0_sort() using bottom-up heapsort
Date: Sun, 21 Jan 2024 23:36:45 +0800 [thread overview]
Message-ID: <20240121153649.2733274-2-visitorckw@gmail.com> (raw)
In-Reply-To: <20240121153649.2733274-1-visitorckw@gmail.com>
This optimization reduces the average number of comparisons required
from 2*n*log2(n) - 3*n + o(n) to n*log2(n) + 0.37*n + o(n). When n is
sufficiently large, it results in approximately 50% fewer comparisons.
Currently, eytzinger0_sort employs the textbook version of heapsort,
where during the heapify process, each level requires two comparisons
to determine the maximum among three elements. In contrast, the
bottom-up heapsort, during heapify, only compares two children at each
level until reaching a leaf node. Then, it backtracks from the leaf
node to find the correct position. Since heapify typically continues
until very close to the leaf node, the standard heapify requires about
2*log2(n) comparisons, while the bottom-up variant only needs log2(n)
comparisons.
The experimental data presented below is based on an array generated
by get_random_u32().
| N | comparisons(old) | comparisons(new) | time(old) | time(new) |
|-------|------------------|------------------|-----------|-----------|
| 10000 | 235381 | 136615 | 25545 us | 20366 us |
| 20000 | 510694 | 293425 | 31336 us | 18312 us |
| 30000 | 800384 | 457412 | 35042 us | 27386 us |
| 40000 | 1101617 | 626831 | 48779 us | 38253 us |
| 50000 | 1409762 | 799637 | 62238 us | 46950 us |
| 60000 | 1721191 | 974521 | 75588 us | 58367 us |
| 70000 | 2038536 | 1152171 | 90823 us | 68778 us |
| 80000 | 2362958 | 1333472 | 104165 us | 78625 us |
| 90000 | 2690900 | 1516065 | 116111 us | 89573 us |
| 100000| 3019413 | 1699879 | 133638 us | 100998 us |
Refs:
BOTTOM-UP-HEAPSORT, a new variant of HEAPSORT beating, on an average,
QUICKSORT (if n is not very small)
Ingo Wegener
Theoretical Computer Science, 118(1); Pages 81-98, 13 September 1993
https://doi.org/10.1016/0304-3975(93)90364-Y
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
---
This patch has undergone unit testing and micro benchmarking using the
following code [1].
[1]:
static long long int cmp_count = 0;
static int mycmp(const void *a, const void *b, size_t size)
{
u32 _a = *(u32 *)a;
u32 _b = *(u32 *)b;
cmp_count++;
if (_a < _b)
return -1;
else if (_a > _b)
return 1;
else
return 0;
}
static int test(void)
{
size_t N, i, L, R;
ktime_t start, end;
s64 delta;
u32 *arr;
for (N = 10000; N <= 100000; N += 10000) {
arr = kmalloc_array(N, sizeof(u32), GFP_KERNEL);
cmp_count = 0;
for (i = 0; i < N; i++)
arr[i] = get_random_u32();
start = ktime_get();
eytzinger0_sort(arr, N, sizeof(u32), mycmp, NULL);
end = ktime_get();
delta = ktime_us_delta(end, start);
printk(KERN_INFO "time: %lld\n", delta);
printk(KERN_INFO "comparisons: %lld\n", cmp_count);
for (int i = 0; i < N; i++) {
L = 2 * i + 1;
R = 2 * i + 2;
if (L < N && arr[i] < arr[L])
goto err;
if (R < N && arr[i] > arr[R])
goto err;
}
kfree(arr);
}
return 0;
err:
kfree(arr);
return -1;
}
fs/bcachefs/util.c | 50 +++++++++++++++++++++++++++-------------------
1 file changed, 30 insertions(+), 20 deletions(-)
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index c2ef7cddaa4f..bbc83b43162e 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -911,7 +911,7 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
int (*cmp_func)(const void *, const void *, size_t),
void (*swap_func)(void *, void *, size_t))
{
- int i, c, r;
+ int i, j, k;
if (!swap_func) {
if (size == 4 && alignment_ok(base, 4))
@@ -924,17 +924,22 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
/* heapify */
for (i = n / 2 - 1; i >= 0; --i) {
- for (r = i; r * 2 + 1 < n; r = c) {
- c = r * 2 + 1;
-
- if (c + 1 < n &&
- do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
- c++;
-
- if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
- break;
-
- do_swap(base, n, size, swap_func, r, c);
+ /* Find the sift-down path all the way to the leaves. */
+ for (j = i; k = j * 2 + 1, k + 1 < n;)
+ j = do_cmp(base, n, size, cmp_func, k, k + 1) > 0 ? k : k + 1;
+
+ /* Special case for the last leaf with no sibling. */
+ if (j * 2 + 2 == n)
+ j = j * 2 + 1;
+
+ /* Backtrack to the correct location. */
+ while (j != i && do_cmp(base, n, size, cmp_func, i, j) >= 0)
+ j = (j - 1) / 2;
+
+ /* Shift the element into its correct place. */
+ for (k = j; j != i;) {
+ j = (j - 1) / 2;
+ do_swap(base, n, size, swap_func, j, k);
}
}
@@ -942,17 +947,22 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
for (i = n - 1; i > 0; --i) {
do_swap(base, n, size, swap_func, 0, i);
- for (r = 0; r * 2 + 1 < i; r = c) {
- c = r * 2 + 1;
+ /* Find the sift-down path all the way to the leaves. */
+ for (j = 0; k = j * 2 + 1, k + 1 < i;)
+ j = do_cmp(base, n, size, cmp_func, k, k + 1) > 0 ? k : k + 1;
- if (c + 1 < i &&
- do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
- c++;
+ /* Special case for the last leaf with no sibling. */
+ if (j * 2 + 2 == i)
+ j = j * 2 + 1;
- if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
- break;
+ /* Backtrack to the correct location. */
+ while (j && do_cmp(base, n, size, cmp_func, 0, j) >= 0)
+ j = (j - 1) / 2;
- do_swap(base, n, size, swap_func, r, c);
+ /* Shift the element into its correct place. */
+ for (k = j; j;) {
+ j = (j - 1) / 2;
+ do_swap(base, n, size, swap_func, j, k);
}
}
}
--
2.25.1
next prev parent reply other threads:[~2024-01-21 15:37 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-21 15:36 [PATCH 0/5] Optimize number of comparisons for heap/heapsort implementaion Kuan-Wei Chiu
2024-01-21 15:36 ` Kuan-Wei Chiu [this message]
2024-01-21 15:36 ` [PATCH 2/5] bcachefs: Introduce parent function for sort_cmp_size() Kuan-Wei Chiu
2024-01-21 16:17 ` Kent Overstreet
2024-01-21 17:05 ` Kuan-Wei Chiu
2024-01-21 17:37 ` Kent Overstreet
2024-01-21 15:36 ` [PATCH 3/5] bcachefs: Optimize sort_cmp_size() using bottom-up heapsort Kuan-Wei Chiu
2024-01-21 15:36 ` [PATCH 4/5] bcachefs: Optimize number of comparisons in heap_sift_down Kuan-Wei Chiu
2024-01-21 15:36 ` [PATCH 5/5] bcache: Optimize number of comparisons in heap_sift Kuan-Wei Chiu
2024-01-21 16:21 ` [PATCH 0/5] Optimize number of comparisons for heap/heapsort implementaion Kent Overstreet
2024-01-21 16:55 ` Kuan-Wei Chiu
2024-01-21 17:41 ` Kent Overstreet
2024-01-22 15:06 ` Kuan-Wei Chiu
2024-01-22 16:06 ` Kent Overstreet
2024-01-22 16:23 ` Kuan-Wei Chiu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240121153649.2733274-2-visitorckw@gmail.com \
--to=visitorckw@gmail.com \
--cc=bfoster@redhat.com \
--cc=colyli@suse.de \
--cc=jserv@ccns.ncku.edu.tw \
--cc=kent.overstreet@linux.dev \
--cc=linux-bcache@vger.kernel.org \
--cc=linux-bcachefs@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.