xor: add a kunit benchmark

Linux RAID subsystem development
 help / color / mirror / Atom feed

* xor: add a kunit benchmark
@ 2026-06-17  5:44 Christoph Hellwig
  2026-06-17  5:44 ` [PATCH 1/2] xor/kunit: fix a spelling error Christoph Hellwig
  2026-06-17  5:44 ` [PATCH 2/2] xor/kunit: add a benchmark Christoph Hellwig
  0 siblings, 2 replies; 5+ messages in thread
From: Christoph Hellwig @ 2026-06-17  5:44 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Eric Biggers, linux-kernel, linux-raid

From: Eric Biggers <ebiggers@kernel.org>

Hi all,

this series adds a benchmark modelled after the CRC benchmark for to the
xor kunit test.

This is based on current Linus' master so probably needs a minor rebase
for Kconfig conflicts once the raid6 series is merged.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] xor/kunit: fix a spelling error
  2026-06-17  5:44 xor: add a kunit benchmark Christoph Hellwig
@ 2026-06-17  5:44 ` Christoph Hellwig
  2026-06-17  5:44 ` [PATCH 2/2] xor/kunit: add a benchmark Christoph Hellwig
  1 sibling, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2026-06-17  5:44 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Eric Biggers, linux-kernel, linux-raid

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 lib/raid/xor/tests/xor_kunit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/raid/xor/tests/xor_kunit.c b/lib/raid/xor/tests/xor_kunit.c
index 0c2a3a420bf9..659ae3edbc25 100644
--- a/lib/raid/xor/tests/xor_kunit.c
+++ b/lib/raid/xor/tests/xor_kunit.c
@@ -85,7 +85,7 @@ static void xor_test(struct kunit *test)
 			xor_generate_random_data();
 
 		/*
-		 * If we're not using the entire buffer size, inject randomize
+		 * If we're not using the entire buffer size, inject randomized
 		 * alignment into the buffer.
 		 */
 		max_alignment = XOR_KUNIT_MAX_BYTES - len;
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] xor/kunit: add a benchmark
  2026-06-17  5:44 xor: add a kunit benchmark Christoph Hellwig
  2026-06-17  5:44 ` [PATCH 1/2] xor/kunit: fix a spelling error Christoph Hellwig
@ 2026-06-17  5:44 ` Christoph Hellwig
  2026-06-17  6:00   ` sashiko-bot
  2026-06-17 17:14   ` Eric Biggers
  1 sibling, 2 replies; 5+ messages in thread
From: Christoph Hellwig @ 2026-06-17  5:44 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Eric Biggers, linux-kernel, linux-raid

Add a benchmark to test the XOR functions for more representative block
sizes and numbers of disks.  Including 64k would be useful here, but
increasing the test buffer size increases the runtime of the functional
kunit test too much unfortunately.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 lib/raid/Kconfig               |  6 ++++
 lib/raid/xor/tests/xor_kunit.c | 63 ++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/lib/raid/Kconfig b/lib/raid/Kconfig
index 5ab2b0a7be4c..034969d240c6 100644
--- a/lib/raid/Kconfig
+++ b/lib/raid/Kconfig
@@ -28,3 +28,9 @@ config XOR_KUNIT_TEST
 
 	  This is intended to help people writing architecture-specific
 	  optimized versions.  If unsure, say N.
+
+config XOR_BENCHMARK
+	bool "Benchmark for xor_gen"
+	depends on XOR_KUNIT_TEST
+	help
+	  Include benchmarks in the KUnit test suite for xor_gen.
diff --git a/lib/raid/xor/tests/xor_kunit.c b/lib/raid/xor/tests/xor_kunit.c
index 659ae3edbc25..5939b78b3944 100644
--- a/lib/raid/xor/tests/xor_kunit.c
+++ b/lib/raid/xor/tests/xor_kunit.c
@@ -125,8 +125,71 @@ static void xor_test(struct kunit *test)
 	}
 }
 
+#ifdef CONFIG_XOR_BENCHMARK
+static void xor_benchmark(struct kunit *test)
+{
+	static const unsigned int nr_to_test[] = {
+		4, 5, 6, 7, 8, 10, 12, 15, 16, 32,
+	};
+	static const unsigned int len_to_test[] = {
+		SZ_4K, SZ_16K,
+	};
+	unsigned int i, j, l;
+	u64 t;
+
+	/* warm-up */
+	for (i = 0; i < ARRAY_SIZE(nr_to_test); i++) {
+		for (j = 0; j < ARRAY_SIZE(len_to_test); j++) {
+			for (l = 0; l < 10; l++) {
+				xor_gen(test_dest, test_buffers, nr_to_test[i],
+						len_to_test[j]);
+			}
+		}
+	}
+
+	/*
+	 * Preferably this would be a loop over len_to_test, but the kunit
+	 * logging always adds a newline to each logged format string.
+	 */
+	static_assert(ARRAY_SIZE(len_to_test) == 2);
+	kunit_info(test, "          \t%5u bytes\t%5u bytes\n",
+			len_to_test[0], len_to_test[1]);
+
+	for (i = 0; i < ARRAY_SIZE(nr_to_test); i++) {
+		unsigned int nr = nr_to_test[i];
+		u64 speed[ARRAY_SIZE(len_to_test)];
+
+		KUNIT_ASSERT_LE(test, nr, XOR_KUNIT_MAX_BUFFERS);
+
+		for (j = 0; j < ARRAY_SIZE(len_to_test); j++) {
+			unsigned int len = len_to_test[j];
+			const unsigned long num_iters = 1000;
+
+			KUNIT_ASSERT_GT(test, len, 0);
+			KUNIT_ASSERT_LE(test, len, XOR_KUNIT_MAX_BYTES);
+
+			preempt_disable();
+			t = ktime_get();
+			for (l = 0; l < num_iters; l++)
+				xor_gen(test_dest, test_buffers, nr, len);
+			t = ktime_get_ns() - t;
+			preempt_enable();
+
+			speed[j] = div_u64((u64)len * num_iters * nr, t);
+		}
+
+		static_assert(ARRAY_SIZE(len_to_test) == 2);
+		kunit_info(test, "%3u disks:\t%5llu  GB/s\t%5llu  GB/s\n",
+				nr, speed[0], speed[1]);
+	}
+}
+#endif /* CONFIG_XOR_BENCHMARK */
+
 static struct kunit_case xor_test_cases[] = {
 	KUNIT_CASE(xor_test),
+#ifdef CONFIG_XOR_BENCHMARK
+	KUNIT_CASE(xor_benchmark),
+#endif
 	{},
 };
 
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] xor/kunit: add a benchmark
  2026-06-17  5:44 ` [PATCH 2/2] xor/kunit: add a benchmark Christoph Hellwig
@ 2026-06-17  6:00   ` sashiko-bot
  2026-06-17 17:14   ` Eric Biggers
  1 sibling, 0 replies; 5+ messages in thread
From: sashiko-bot @ 2026-06-17  6:00 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-raid, yukuai

Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider:
- [High] The elapsed benchmark time `t` (a 64-bit nanosecond value) is passed as a 32-bit divisor to `div_u64()`, leading to implicit truncation for runs over 4.29 seconds, resulting in totally incorrect throughput metrics or a potential divide-by-zero kernel panic.
- [Medium] Executing a massive 1,000-iteration XOR processing loop synchronously with preemption disabled can cause RCU CPU stall warnings and softlockups on slower test environments.
- [Medium] The throughput computation calculates Gigabytes per second (GB/s) using integer division, which unconditionally truncates any throughput slower than 1 GB/s down to 0 GB/s.
--

commit facfbbbac101882a52a1c7a2e6d86cb1173448ed
Author: Christoph Hellwig <hch@lst.de>

xor/kunit: add a benchmark

This commit adds a benchmark to test the XOR functions for more representative
block sizes and numbers of disks. It includes 4k and 16k sizes, as increasing
the test buffer size further would increase the functional kunit test runtime
too much.

> diff --git a/lib/raid/xor/tests/xor_kunit.c b/lib/raid/xor/tests/xor_kunit.c
> index 659ae3edbc25b..5939b78b39446 100644
> --- a/lib/raid/xor/tests/xor_kunit.c
> +++ b/lib/raid/xor/tests/xor_kunit.c

[ ... ]

> @@ -125,8 +125,71 @@ static void xor_test(struct kunit *test)
> +		for (j = 0; j < ARRAY_SIZE(len_to_test); j++) {
> +			unsigned int len = len_to_test[j];
> +			const unsigned long num_iters = 1000;
> +
> +			KUNIT_ASSERT_GT(test, len, 0);
> +			KUNIT_ASSERT_LE(test, len, XOR_KUNIT_MAX_BYTES);
> +
> +			preempt_disable();
> +			t = ktime_get();
> +			for (l = 0; l < num_iters; l++)
> +				xor_gen(test_dest, test_buffers, nr, len);
> +			t = ktime_get_ns() - t;
> +			preempt_enable();

[Severity: Medium]
Will this massive 1,000-iteration XOR loop cause RCU CPU stall warnings or
trigger the softlockup watchdog on slower environments?

In the worst-case configuration, this processes 512MB of memory synchronously
with preemption completely disabled. On unaccelerated CI environments or
low-power CPUs, processing this amount of data can take tens of seconds, which
violates latency guarantees.

Should preemption be enabled periodically, or the number of iterations scaled
dynamically?

> +
> +			speed[j] = div_u64((u64)len * num_iters * nr, t);

[Severity: High]
Does this code silently truncate the 64-bit elapsed time t when dividing?

div_u64() expects a 32-bit unsigned integer as its divisor. The elapsed time
t is a 64-bit nanosecond value. If the execution takes longer than 4.29
seconds (U32_MAX nanoseconds), the upper 32 bits of t will be implicitly
truncated, corrupting the calculation.

Worse, if the lower 32 bits happen to truncate to exactly 0, this will trigger
a division by zero kernel panic.

Should this use div64_u64() instead?

[Severity: Medium]
Will this integer division unconditionally truncate any throughput slower than
1 GB/s down to 0 GB/s?

Mathematically, computing bytes per nanosecond is directly equivalent to
computing GB/s. Because this uses integer division without scaling the
numerator, any throughput below 1 GB/s will evaluate to exactly 0. On
processors without SIMD acceleration or in VMs, this might falsely log 0 GB/s.

Could the numerator be multiplied by 1,000 to output MB/s and preserve useful
precision?

> +		}
> +
> +		static_assert(ARRAY_SIZE(len_to_test) == 2);
> +		kunit_info(test, "%3u disks:\t%5llu  GB/s\t%5llu  GB/s\n",
> +				nr, speed[0], speed[1]);
> +	}
> +}
> +#endif /* CONFIG_XOR_BENCHMARK */

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260617054416.3841334-1-hch@lst.de?part=2

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] xor/kunit: add a benchmark
  2026-06-17  5:44 ` [PATCH 2/2] xor/kunit: add a benchmark Christoph Hellwig
  2026-06-17  6:00   ` sashiko-bot
@ 2026-06-17 17:14   ` Eric Biggers
  1 sibling, 0 replies; 5+ messages in thread
From: Eric Biggers @ 2026-06-17 17:14 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Andrew Morton, linux-kernel, linux-raid

On Wed, Jun 17, 2026 at 07:44:04AM +0200, Christoph Hellwig wrote:
> diff --git a/lib/raid/xor/tests/xor_kunit.c b/lib/raid/xor/tests/xor_kunit.c
> index 659ae3edbc25..5939b78b3944 100644
> --- a/lib/raid/xor/tests/xor_kunit.c
> +++ b/lib/raid/xor/tests/xor_kunit.c
> @@ -125,8 +125,71 @@ static void xor_test(struct kunit *test)
>  	}
>  }
>  
> +#ifdef CONFIG_XOR_BENCHMARK
> +static void xor_benchmark(struct kunit *test)

The #ifdef can be avoided using kunit_skip(), as the crypto and CRC
tests do:

    if (!IS_ENABLED(CONFIG_XOR_BENCHMARK))
            kunit_skip(test, "not enabled");

> +		for (j = 0; j < ARRAY_SIZE(len_to_test); j++) {
> +			unsigned int len = len_to_test[j];
> +			const unsigned long num_iters = 1000;
> +
> +			KUNIT_ASSERT_GT(test, len, 0);
> +			KUNIT_ASSERT_LE(test, len, XOR_KUNIT_MAX_BYTES);
> +
> +			preempt_disable();
> +			t = ktime_get();
> +			for (l = 0; l < num_iters; l++)
> +				xor_gen(test_dest, test_buffers, nr, len);
> +			t = ktime_get_ns() - t;
> +			preempt_enable();

First one should be ktime_get_ns(), not ktime_get().

> +
> +			speed[j] = div_u64((u64)len * num_iters * nr, t);
> +		}
> +
> +		static_assert(ARRAY_SIZE(len_to_test) == 2);
> +		kunit_info(test, "%3u disks:\t%5llu  GB/s\t%5llu  GB/s\n",
> +				nr, speed[0], speed[1]);

As mentioned in the other thread, this measures the speed at which the
source data is consumed, which differs from the code in
lib/raid/xor/xor-core.c that measures the speed at which the destination
data is produced.  Probably best to make them consistent.

- Eric

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-06-17 17:14 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-17  5:44 xor: add a kunit benchmark Christoph Hellwig
2026-06-17  5:44 ` [PATCH 1/2] xor/kunit: fix a spelling error Christoph Hellwig
2026-06-17  5:44 ` [PATCH 2/2] xor/kunit: add a benchmark Christoph Hellwig
2026-06-17  6:00   ` sashiko-bot
2026-06-17 17:14   ` Eric Biggers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox