* [PATCH v5 1/8] lib/string_kunit: add correctness test for strlen()
2026-01-27 1:25 [PATCH v5 0/8] riscv: optimize string functions and add kunit tests Feng Jiang
@ 2026-01-27 1:25 ` Feng Jiang
2026-01-28 22:39 ` Kees Cook
2026-01-27 1:25 ` [PATCH v5 2/8] lib/string_kunit: add correctness test for strnlen() Feng Jiang
` (6 subsequent siblings)
7 siblings, 1 reply; 17+ messages in thread
From: Feng Jiang @ 2026-01-27 1:25 UTC (permalink / raw)
To: pjw, palmer, aou, alex, akpm, kees, andy, jiangfeng, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan
Cc: linux-riscv, linux-kernel, linux-hardening, Joel Stanley
Add a KUnit test for strlen() to verify correctness across
different string lengths and memory alignments.
Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
Acked-by: Andy Shevchenko <andy@kernel.org>
Tested-by: Joel Stanley <joel@jms.id.au>
---
lib/tests/string_kunit.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/lib/tests/string_kunit.c b/lib/tests/string_kunit.c
index f9a8e557ba77..bc5130c6e5e9 100644
--- a/lib/tests/string_kunit.c
+++ b/lib/tests/string_kunit.c
@@ -17,6 +17,9 @@
#define STRCMP_TEST_EXPECT_LOWER(test, fn, ...) KUNIT_EXPECT_LT(test, fn(__VA_ARGS__), 0)
#define STRCMP_TEST_EXPECT_GREATER(test, fn, ...) KUNIT_EXPECT_GT(test, fn(__VA_ARGS__), 0)
+#define STRING_TEST_MAX_LEN 128
+#define STRING_TEST_MAX_OFFSET 16
+
static void string_test_memset16(struct kunit *test)
{
unsigned i, j, k;
@@ -104,6 +107,28 @@ static void string_test_memset64(struct kunit *test)
}
}
+static void string_test_strlen(struct kunit *test)
+{
+ const size_t buf_size = STRING_TEST_MAX_LEN + STRING_TEST_MAX_OFFSET + 1;
+ size_t len, offset;
+ char *s;
+
+ s = kunit_kzalloc(test, buf_size, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, s);
+
+ memset(s, 'A', buf_size);
+ s[buf_size - 1] = '\0';
+
+ for (offset = 0; offset < STRING_TEST_MAX_OFFSET; offset++) {
+ for (len = 0; len <= STRING_TEST_MAX_LEN; len++) {
+ s[offset + len] = '\0';
+ KUNIT_EXPECT_EQ_MSG(test, strlen(s + offset), len,
+ "offset:%zu len:%zu", offset, len);
+ s[offset + len] = 'A';
+ }
+ }
+}
+
static void string_test_strchr(struct kunit *test)
{
const char *test_string = "abcdefghijkl";
@@ -618,6 +643,7 @@ static struct kunit_case string_test_cases[] = {
KUNIT_CASE(string_test_memset16),
KUNIT_CASE(string_test_memset32),
KUNIT_CASE(string_test_memset64),
+ KUNIT_CASE(string_test_strlen),
KUNIT_CASE(string_test_strchr),
KUNIT_CASE(string_test_strnchr),
KUNIT_CASE(string_test_strspn),
--
2.25.1
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v5 1/8] lib/string_kunit: add correctness test for strlen()
2026-01-27 1:25 ` [PATCH v5 1/8] lib/string_kunit: add correctness test for strlen() Feng Jiang
@ 2026-01-28 22:39 ` Kees Cook
2026-01-29 2:19 ` Feng Jiang
0 siblings, 1 reply; 17+ messages in thread
From: Kees Cook @ 2026-01-28 22:39 UTC (permalink / raw)
To: Feng Jiang
Cc: pjw, palmer, aou, alex, akpm, andy, ebiggers, martin.petersen,
sohil.mehta, charlie, conor.dooley, samuel.holland, linus.walleij,
nathan, linux-riscv, linux-kernel, linux-hardening, Joel Stanley
On Tue, Jan 27, 2026 at 09:25:51AM +0800, Feng Jiang wrote:
> Add a KUnit test for strlen() to verify correctness across
> different string lengths and memory alignments.
>
> Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
> Acked-by: Andy Shevchenko <andy@kernel.org>
> Tested-by: Joel Stanley <joel@jms.id.au>
> ---
> lib/tests/string_kunit.c | 26 ++++++++++++++++++++++++++
> 1 file changed, 26 insertions(+)
>
> diff --git a/lib/tests/string_kunit.c b/lib/tests/string_kunit.c
> index f9a8e557ba77..bc5130c6e5e9 100644
> --- a/lib/tests/string_kunit.c
> +++ b/lib/tests/string_kunit.c
> @@ -17,6 +17,9 @@
> #define STRCMP_TEST_EXPECT_LOWER(test, fn, ...) KUNIT_EXPECT_LT(test, fn(__VA_ARGS__), 0)
> #define STRCMP_TEST_EXPECT_GREATER(test, fn, ...) KUNIT_EXPECT_GT(test, fn(__VA_ARGS__), 0)
>
> +#define STRING_TEST_MAX_LEN 128
> +#define STRING_TEST_MAX_OFFSET 16
> +
> static void string_test_memset16(struct kunit *test)
> {
> unsigned i, j, k;
> @@ -104,6 +107,28 @@ static void string_test_memset64(struct kunit *test)
> }
> }
>
> +static void string_test_strlen(struct kunit *test)
> +{
> + const size_t buf_size = STRING_TEST_MAX_LEN + STRING_TEST_MAX_OFFSET + 1;
> + size_t len, offset;
> + char *s;
> +
> + s = kunit_kzalloc(test, buf_size, GFP_KERNEL);
One aspect of "correctness" that we might want to include here is making
sure we don't have any implementations that over-read. To that end,
perhaps this test can put the string at the end of a vmalloc allocation
(so that the end of the string is right up against an unallocated memory
space).
> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, s);
> +
> + memset(s, 'A', buf_size);
> + s[buf_size - 1] = '\0';
> +
> + for (offset = 0; offset < STRING_TEST_MAX_OFFSET; offset++) {
> + for (len = 0; len <= STRING_TEST_MAX_LEN; len++) {
> + s[offset + len] = '\0';
> + KUNIT_EXPECT_EQ_MSG(test, strlen(s + offset), len,
> + "offset:%zu len:%zu", offset, len);
> + s[offset + len] = 'A';
> + }
> + }
> +}
It would require building the string backwards here. Or maybe we just
need a separate test for the over-read concerns?
Thoughts?
-Kees
--
Kees Cook
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v5 1/8] lib/string_kunit: add correctness test for strlen()
2026-01-28 22:39 ` Kees Cook
@ 2026-01-29 2:19 ` Feng Jiang
0 siblings, 0 replies; 17+ messages in thread
From: Feng Jiang @ 2026-01-29 2:19 UTC (permalink / raw)
To: Kees Cook
Cc: pjw, palmer, aou, alex, akpm, andy, ebiggers, martin.petersen,
sohil.mehta, charlie, conor.dooley, samuel.holland, linus.walleij,
nathan, linux-riscv, linux-kernel, linux-hardening, Joel Stanley
On 2026/1/29 06:39, Kees Cook wrote:
> On Tue, Jan 27, 2026 at 09:25:51AM +0800, Feng Jiang wrote:
>> Add a KUnit test for strlen() to verify correctness across
>> different string lengths and memory alignments.
>>
>> Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
>> Acked-by: Andy Shevchenko <andy@kernel.org>
>> Tested-by: Joel Stanley <joel@jms.id.au>
>> ---
>> lib/tests/string_kunit.c | 26 ++++++++++++++++++++++++++
>> 1 file changed, 26 insertions(+)
>>
>> diff --git a/lib/tests/string_kunit.c b/lib/tests/string_kunit.c
>> index f9a8e557ba77..bc5130c6e5e9 100644
>> --- a/lib/tests/string_kunit.c
>> +++ b/lib/tests/string_kunit.c
>> @@ -17,6 +17,9 @@
>> #define STRCMP_TEST_EXPECT_LOWER(test, fn, ...) KUNIT_EXPECT_LT(test, fn(__VA_ARGS__), 0)
>> #define STRCMP_TEST_EXPECT_GREATER(test, fn, ...) KUNIT_EXPECT_GT(test, fn(__VA_ARGS__), 0)
>>
>> +#define STRING_TEST_MAX_LEN 128
>> +#define STRING_TEST_MAX_OFFSET 16
>> +
>> static void string_test_memset16(struct kunit *test)
>> {
>> unsigned i, j, k;
>> @@ -104,6 +107,28 @@ static void string_test_memset64(struct kunit *test)
>> }
>> }
>>
>> +static void string_test_strlen(struct kunit *test)
>> +{
>> + const size_t buf_size = STRING_TEST_MAX_LEN + STRING_TEST_MAX_OFFSET + 1;
>> + size_t len, offset;
>> + char *s;
>> +
>> + s = kunit_kzalloc(test, buf_size, GFP_KERNEL);
>
> One aspect of "correctness" that we might want to include here is making
> sure we don't have any implementations that over-read. To that end,
> perhaps this test can put the string at the end of a vmalloc allocation
> (so that the end of the string is right up against an unallocated memory
> space).
>
>> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, s);
>> +
>> + memset(s, 'A', buf_size);
>> + s[buf_size - 1] = '\0';
>> +
>> + for (offset = 0; offset < STRING_TEST_MAX_OFFSET; offset++) {
>> + for (len = 0; len <= STRING_TEST_MAX_LEN; len++) {
>> + s[offset + len] = '\0';
>> + KUNIT_EXPECT_EQ_MSG(test, strlen(s + offset), len,
>> + "offset:%zu len:%zu", offset, len);
>> + s[offset + len] = 'A';
>> + }
>> + }
>> +}
>
> It would require building the string backwards here. Or maybe we just
> need a separate test for the over-read concerns?
>
> Thoughts?
Thanks for the suggestion! That is a very effective way to catch potential
over-reads in optimized implementations.
I will refactor the correctness tests in v6 to use a vmalloc-allocated page
and ensure the NUL character is positioned at the very end of the allocation
boundary.
I'll send out the v6 patch set shortly with these changes.
--
With Best Regards,
Feng Jiang
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v5 2/8] lib/string_kunit: add correctness test for strnlen()
2026-01-27 1:25 [PATCH v5 0/8] riscv: optimize string functions and add kunit tests Feng Jiang
2026-01-27 1:25 ` [PATCH v5 1/8] lib/string_kunit: add correctness test for strlen() Feng Jiang
@ 2026-01-27 1:25 ` Feng Jiang
2026-01-27 1:25 ` [PATCH v5 3/8] lib/string_kunit: add correctness test for strrchr() Feng Jiang
` (5 subsequent siblings)
7 siblings, 0 replies; 17+ messages in thread
From: Feng Jiang @ 2026-01-27 1:25 UTC (permalink / raw)
To: pjw, palmer, aou, alex, akpm, kees, andy, jiangfeng, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan
Cc: linux-riscv, linux-kernel, linux-hardening, Joel Stanley
Add a KUnit test for strnlen() to verify correctness across
different string lengths and memory alignments.
Suggested-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
Acked-by: Andy Shevchenko <andy@kernel.org>
Tested-by: Joel Stanley <joel@jms.id.au>
---
lib/tests/string_kunit.c | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/lib/tests/string_kunit.c b/lib/tests/string_kunit.c
index bc5130c6e5e9..991f01d52eb2 100644
--- a/lib/tests/string_kunit.c
+++ b/lib/tests/string_kunit.c
@@ -129,6 +129,38 @@ static void string_test_strlen(struct kunit *test)
}
}
+static void string_test_strnlen(struct kunit *test)
+{
+ const size_t buf_size = STRING_TEST_MAX_LEN + STRING_TEST_MAX_OFFSET + 1;
+ size_t len, offset;
+ char *s;
+
+ s = kunit_kzalloc(test, buf_size, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, s);
+
+ memset(s, 'A', buf_size);
+ s[buf_size - 1] = '\0';
+
+ for (offset = 0; offset < STRING_TEST_MAX_OFFSET; offset++) {
+ for (len = 0; len <= STRING_TEST_MAX_LEN; len++) {
+ s[offset + len] = '\0';
+
+ if (len > 0)
+ KUNIT_EXPECT_EQ(test, strnlen(s + offset, len - 1), len - 1);
+ if (len > 1)
+ KUNIT_EXPECT_EQ(test, strnlen(s + offset, len - 2), len - 2);
+
+ KUNIT_EXPECT_EQ(test, strnlen(s + offset, len), len);
+
+ KUNIT_EXPECT_EQ(test, strnlen(s + offset, len + 1), len);
+ KUNIT_EXPECT_EQ(test, strnlen(s + offset, len + 2), len);
+ KUNIT_EXPECT_EQ(test, strnlen(s + offset, len + 10), len);
+
+ s[offset + len] = 'A';
+ }
+ }
+}
+
static void string_test_strchr(struct kunit *test)
{
const char *test_string = "abcdefghijkl";
@@ -644,6 +676,7 @@ static struct kunit_case string_test_cases[] = {
KUNIT_CASE(string_test_memset32),
KUNIT_CASE(string_test_memset64),
KUNIT_CASE(string_test_strlen),
+ KUNIT_CASE(string_test_strnlen),
KUNIT_CASE(string_test_strchr),
KUNIT_CASE(string_test_strnchr),
KUNIT_CASE(string_test_strspn),
--
2.25.1
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v5 3/8] lib/string_kunit: add correctness test for strrchr()
2026-01-27 1:25 [PATCH v5 0/8] riscv: optimize string functions and add kunit tests Feng Jiang
2026-01-27 1:25 ` [PATCH v5 1/8] lib/string_kunit: add correctness test for strlen() Feng Jiang
2026-01-27 1:25 ` [PATCH v5 2/8] lib/string_kunit: add correctness test for strnlen() Feng Jiang
@ 2026-01-27 1:25 ` Feng Jiang
2026-01-27 1:25 ` [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen() Feng Jiang
` (4 subsequent siblings)
7 siblings, 0 replies; 17+ messages in thread
From: Feng Jiang @ 2026-01-27 1:25 UTC (permalink / raw)
To: pjw, palmer, aou, alex, akpm, kees, andy, jiangfeng, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan
Cc: linux-riscv, linux-kernel, linux-hardening, Joel Stanley
Introduce a KUnit test to verify strrchr() across various memory
alignments and character positions. This ensures the implementation
correctly identifies the last occurrence of a character.
Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
Acked-by: Andy Shevchenko <andy@kernel.org>
Tested-by: Joel Stanley <joel@jms.id.au>
---
lib/tests/string_kunit.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/lib/tests/string_kunit.c b/lib/tests/string_kunit.c
index 991f01d52eb2..7f1e2bf6a352 100644
--- a/lib/tests/string_kunit.c
+++ b/lib/tests/string_kunit.c
@@ -184,6 +184,35 @@ static void string_test_strchr(struct kunit *test)
KUNIT_ASSERT_NULL(test, result);
}
+static void string_test_strrchr(struct kunit *test)
+{
+ const size_t buf_size = STRING_TEST_MAX_LEN + STRING_TEST_MAX_OFFSET + 1;
+ size_t offset, len;
+ char *buf;
+
+ buf = kunit_kzalloc(test, buf_size, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf);
+
+ memset(buf, 'A', buf_size);
+ buf[buf_size - 1] = '\0';
+
+ for (offset = 0; offset < STRING_TEST_MAX_OFFSET; offset++) {
+ for (len = 0; len <= STRING_TEST_MAX_LEN; len++) {
+ buf[offset + len] = '\0';
+
+ KUNIT_EXPECT_PTR_EQ(test, strrchr(buf + offset, 'Z'), NULL);
+
+ if (len > 0)
+ KUNIT_EXPECT_PTR_EQ(test, strrchr(buf + offset, 'A'),
+ buf + offset + len - 1);
+ else
+ KUNIT_EXPECT_PTR_EQ(test, strrchr(buf + offset, 'A'), NULL);
+
+ buf[offset + len] = 'A';
+ }
+ }
+}
+
static void string_test_strnchr(struct kunit *test)
{
const char *test_string = "abcdefghijkl";
@@ -679,6 +708,7 @@ static struct kunit_case string_test_cases[] = {
KUNIT_CASE(string_test_strnlen),
KUNIT_CASE(string_test_strchr),
KUNIT_CASE(string_test_strnchr),
+ KUNIT_CASE(string_test_strrchr),
KUNIT_CASE(string_test_strspn),
KUNIT_CASE(string_test_strcmp),
KUNIT_CASE(string_test_strcmp_long_strings),
--
2.25.1
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()
2026-01-27 1:25 [PATCH v5 0/8] riscv: optimize string functions and add kunit tests Feng Jiang
` (2 preceding siblings ...)
2026-01-27 1:25 ` [PATCH v5 3/8] lib/string_kunit: add correctness test for strrchr() Feng Jiang
@ 2026-01-27 1:25 ` Feng Jiang
2026-01-27 8:57 ` Andy Shevchenko
2026-01-27 1:25 ` [PATCH v5 5/8] lib/string_kunit: extend benchmarks to strnlen() and chr searches Feng Jiang
` (3 subsequent siblings)
7 siblings, 1 reply; 17+ messages in thread
From: Feng Jiang @ 2026-01-27 1:25 UTC (permalink / raw)
To: pjw, palmer, aou, alex, akpm, kees, andy, jiangfeng, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan
Cc: linux-riscv, linux-kernel, linux-hardening
Introduce a benchmarking framework to the string_kunit test suite to
measure the execution efficiency of string functions.
The implementation is inspired by crc_benchmark(), measuring throughput
(MB/s) and latency (ns/call) across a range of string lengths. It
includes a warm-up phase, disables preemption during measurement, and
uses a fixed seed for reproducible results.
This framework allows for comparing different implementations (e.g.,
generic C vs. architecture-optimized assembly) within the KUnit
environment.
Initially, provide a benchmark for strlen().
Suggested-by: Andy Shevchenko <andy@kernel.org>
Suggested-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
---
lib/Kconfig.debug | 11 +++
lib/tests/string_kunit.c | 159 +++++++++++++++++++++++++++++++++++++++
2 files changed, 170 insertions(+)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ba36939fda79..21b058ae815f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2475,6 +2475,17 @@ config STRING_HELPERS_KUNIT_TEST
depends on KUNIT
default KUNIT_ALL_TESTS
+config STRING_KUNIT_BENCH
+ bool "Benchmark string functions at runtime"
+ depends on STRING_KUNIT_TEST
+ help
+ Enable performance measurement for string functions.
+
+ This measures the execution efficiency of string functions
+ during the KUnit test run.
+
+ If unsure, say N.
+
config FFS_KUNIT_TEST
tristate "KUnit test ffs-family functions at runtime" if !KUNIT_ALL_TESTS
depends on KUNIT
diff --git a/lib/tests/string_kunit.c b/lib/tests/string_kunit.c
index 7f1e2bf6a352..3fcd55953bd7 100644
--- a/lib/tests/string_kunit.c
+++ b/lib/tests/string_kunit.c
@@ -6,10 +6,15 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <kunit/test.h>
+#include <linux/ktime.h>
+#include <linux/math64.h>
#include <linux/module.h>
+#include <linux/prandom.h>
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/time64.h>
+#include <linux/units.h>
#define STRCMP_LARGE_BUF_LEN 2048
#define STRCMP_CHANGE_POINT 1337
@@ -20,6 +25,9 @@
#define STRING_TEST_MAX_LEN 128
#define STRING_TEST_MAX_OFFSET 16
+#define STRING_BENCH_SEED 888
+#define STRING_BENCH_WORKLOAD (1 * MEGA)
+
static void string_test_memset16(struct kunit *test)
{
unsigned i, j, k;
@@ -700,6 +708,156 @@ static void string_test_strends(struct kunit *test)
KUNIT_EXPECT_TRUE(test, strends("", ""));
}
+#if IS_ENABLED(CONFIG_STRING_KUNIT_BENCH)
+/* Target string lengths for benchmarking */
+static const size_t bench_lens[] = {
+ 0, 1, 7, 8, 16, 31, 64, 127, 512, 1024, 3173, 4096,
+};
+
+/**
+ * alloc_max_bench_buffer() - Allocate buffer for the max test case.
+ * @test: KUnit context for managed allocation.
+ * @lens: Array of lengths used in the benchmark cases.
+ * @count: Number of elements in the @lens array.
+ * @buf_len: [out] Pointer to store the actually allocated buffer
+ * size (including NUL character).
+ *
+ * Return: Pointer to the allocated memory, or NULL on failure.
+ */
+static void *alloc_max_bench_buffer(struct kunit *test,
+ const size_t *lens, size_t count, size_t *buf_len)
+{
+ size_t max_len = 0;
+ void *buf;
+
+ for (size_t i = 0; i < count; i++)
+ max_len = max(lens[i], max_len);
+
+ /* Add space for NUL character */
+ max_len += 1;
+
+ buf = kunit_kzalloc(test, max_len, GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ if (buf_len)
+ *buf_len = max_len;
+
+ return buf;
+}
+
+/**
+ * fill_random_string() - Populate a buffer with a random NUL-terminated string.
+ * @buf: Buffer to fill.
+ * @len: Length of the buffer in bytes.
+ *
+ * Fills the buffer with random non-NUL bytes and ensures the string is
+ * properly NUL-terminated.
+ */
+static void fill_random_string(char *buf, size_t len)
+{
+ struct rnd_state state;
+
+ if (!buf || !len)
+ return;
+
+ /* Use a fixed seed to ensure deterministic benchmark results */
+ prandom_seed_state(&state, STRING_BENCH_SEED);
+ prandom_bytes_state(&state, buf, len);
+
+ /* Replace NUL characters to avoid early string termination */
+ for (size_t i = 0; i < len; i++) {
+ if (buf[i] == '\0')
+ buf[i] = 0x01;
+ }
+
+ buf[len - 1] = '\0';
+}
+
+/**
+ * STRING_BENCH() - Benchmark string functions.
+ * @iters: Number of iterations to run.
+ * @func: Function to benchmark.
+ * @...: Variable arguments passed to @func.
+ *
+ * Disables preemption and measures the total time in nanoseconds to execute
+ * @func(@__VA_ARGS__) for @iters times, including a small warm-up phase.
+ *
+ * Context: Disables preemption during measurement.
+ * Return: Total execution time in nanoseconds (u64).
+ */
+#define STRING_BENCH(iters, func, ...) \
+({ \
+ /* Volatile function pointer prevents dead code elimination */ \
+ typeof(func) (* volatile __func) = (func); \
+ size_t __bn_iters = (iters); \
+ size_t __bn_warm_iters; \
+ u64 __bn_t; \
+ \
+ __bn_warm_iters = max(__bn_iters / 10, 50U); \
+ \
+ for (size_t __bn_i = 0; __bn_i < __bn_warm_iters; __bn_i++) \
+ (void)__func(__VA_ARGS__); \
+ \
+ preempt_disable(); \
+ __bn_t = ktime_get_ns(); \
+ for (size_t __bn_i = 0; __bn_i < __bn_iters; __bn_i++) \
+ (void)__func(__VA_ARGS__); \
+ __bn_t = ktime_get_ns() - __bn_t; \
+ preempt_enable(); \
+ __bn_t; \
+})
+
+/**
+ * STRING_BENCH_BUF() - Benchmark harness for single-buffer functions.
+ * @test: KUnit context.
+ * @buf_name: Local char * variable name to be defined.
+ * @buf_size: Local size_t variable name to be defined.
+ * @func: Function to benchmark.
+ * @...: Extra arguments for @func.
+ *
+ * Prepares a randomized, NUL-terminated buffer and iterates through lengths
+ * in bench_lens, defining @buf_name and @buf_size in each loop.
+ */
+#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...) \
+do { \
+ size_t buf_size, _bn_i, _bn_iters, _bn_size = 0; \
+ u64 _bn_t, _bn_mbps = 0, _bn_lat = 0; \
+ char *buf_name, *_bn_buf; \
+ \
+ _bn_buf = alloc_max_bench_buffer(test, bench_lens, \
+ ARRAY_SIZE(bench_lens), &_bn_size); \
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf); \
+ \
+ fill_random_string(_bn_buf, _bn_size); \
+ \
+ for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) { \
+ buf_size = bench_lens[_bn_i]; \
+ buf_name = _bn_buf + _bn_size - buf_size - 1; \
+ _bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U); \
+ \
+ _bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__); \
+ \
+ if (_bn_t > 0) { \
+ _bn_mbps = (u64)(buf_size) * _bn_iters \
+ * (NSEC_PER_SEC / MEGA); \
+ _bn_mbps = div64_u64(_bn_mbps, _bn_t); \
+ _bn_lat = div64_u64(_bn_t, _bn_iters); \
+ } \
+ kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n", \
+ buf_size, _bn_mbps, _bn_lat); \
+ } \
+} while (0)
+#else
+#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...) \
+ kunit_skip(test, "not enabled")
+#endif /* IS_ENABLED(CONFIG_STRING_KUNIT_BENCH) */
+
+static void string_bench_strlen(struct kunit *test)
+{
+ STRING_BENCH_BUF(test, buf, len, strlen, buf);
+}
+
static struct kunit_case string_test_cases[] = {
KUNIT_CASE(string_test_memset16),
KUNIT_CASE(string_test_memset32),
@@ -725,6 +883,7 @@ static struct kunit_case string_test_cases[] = {
KUNIT_CASE(string_test_strtomem),
KUNIT_CASE(string_test_memtostr),
KUNIT_CASE(string_test_strends),
+ KUNIT_CASE(string_bench_strlen),
{}
};
--
2.25.1
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()
2026-01-27 1:25 ` [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen() Feng Jiang
@ 2026-01-27 8:57 ` Andy Shevchenko
2026-01-27 9:33 ` Feng Jiang
0 siblings, 1 reply; 17+ messages in thread
From: Andy Shevchenko @ 2026-01-27 8:57 UTC (permalink / raw)
To: Feng Jiang
Cc: pjw, palmer, aou, alex, akpm, kees, andy, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan, linux-riscv, linux-kernel,
linux-hardening
On Tue, Jan 27, 2026 at 09:25:54AM +0800, Feng Jiang wrote:
> Introduce a benchmarking framework to the string_kunit test suite to
> measure the execution efficiency of string functions.
>
> The implementation is inspired by crc_benchmark(), measuring throughput
> (MB/s) and latency (ns/call) across a range of string lengths. It
> includes a warm-up phase, disables preemption during measurement, and
> uses a fixed seed for reproducible results.
>
> This framework allows for comparing different implementations (e.g.,
> generic C vs. architecture-optimized assembly) within the KUnit
> environment.
Acked-by: Andy Shevchenko <andy@kernel.org>
A few nit-picks below.
...
> +static void *alloc_max_bench_buffer(struct kunit *test,
> + const size_t *lens, size_t count, size_t *buf_len)
> +{
> + size_t max_len = 0;
> + void *buf;
> +
> + for (size_t i = 0; i < count; i++)
> + max_len = max(lens[i], max_len);
You also need minmax.h.
> + /* Add space for NUL character */
> + max_len += 1;
> +
> + buf = kunit_kzalloc(test, max_len, GFP_KERNEL);
> + if (!buf)
> + return NULL;
> +
> + if (buf_len)
> + *buf_len = max_len;
> +
> + return buf;
> +}
...
> +#define STRING_BENCH(iters, func, ...) \
> +({ \
> + /* Volatile function pointer prevents dead code elimination */ \
> + typeof(func) (* volatile __func) = (func); \
> + size_t __bn_iters = (iters); \
> + size_t __bn_warm_iters; \
> + u64 __bn_t; \
Perhaps a short comment here
/* Use 10% of the given iterations (maximum 50) to warm up */
> + __bn_warm_iters = max(__bn_iters / 10, 50U); \
> + \
> + for (size_t __bn_i = 0; __bn_i < __bn_warm_iters; __bn_i++) \
> + (void)__func(__VA_ARGS__); \
> + \
> + preempt_disable(); \
> + __bn_t = ktime_get_ns(); \
> + for (size_t __bn_i = 0; __bn_i < __bn_iters; __bn_i++) \
> + (void)__func(__VA_ARGS__); \
> + __bn_t = ktime_get_ns() - __bn_t; \
> + preempt_enable(); \
> + __bn_t; \
> +})
...
> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...) \
> +do { \
> + size_t buf_size, _bn_i, _bn_iters, _bn_size = 0; \
> + u64 _bn_t, _bn_mbps = 0, _bn_lat = 0; \
> + char *buf_name, *_bn_buf; \
> + \
> + _bn_buf = alloc_max_bench_buffer(test, bench_lens, \
> + ARRAY_SIZE(bench_lens), &_bn_size); \
> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf); \
> + \
> + fill_random_string(_bn_buf, _bn_size); \
> + \
> + for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) { \
> + buf_size = bench_lens[_bn_i]; \
> + buf_name = _bn_buf + _bn_size - buf_size - 1; \
> + _bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U); \
> + \
> + _bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__); \
> + \
Remove unneeded blank line.
> + if (_bn_t > 0) { \
> + _bn_mbps = (u64)(buf_size) * _bn_iters \
Why buf_size in the parentheses here and not anywhere else (above)?
I assume it's just an external temporary variable? But why do we need to have
it in the parameters to the macro?
> + * (NSEC_PER_SEC / MEGA); \
Leave '*' on the previous line.
> + _bn_mbps = div64_u64(_bn_mbps, _bn_t); \
> + _bn_lat = div64_u64(_bn_t, _bn_iters); \
> + } \
> + kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n", \
> + buf_size, _bn_mbps, _bn_lat); \
> + } \
> +} while (0)
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()
2026-01-27 8:57 ` Andy Shevchenko
@ 2026-01-27 9:33 ` Feng Jiang
2026-01-27 9:50 ` Andy Shevchenko
0 siblings, 1 reply; 17+ messages in thread
From: Feng Jiang @ 2026-01-27 9:33 UTC (permalink / raw)
To: Andy Shevchenko
Cc: pjw, palmer, aou, alex, akpm, kees, andy, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan, linux-riscv, linux-kernel,
linux-hardening
On 2026/1/27 16:57, Andy Shevchenko wrote:
> On Tue, Jan 27, 2026 at 09:25:54AM +0800, Feng Jiang wrote:
>> Introduce a benchmarking framework to the string_kunit test suite to
>> measure the execution efficiency of string functions.
>>
>> The implementation is inspired by crc_benchmark(), measuring throughput
>> (MB/s) and latency (ns/call) across a range of string lengths. It
>> includes a warm-up phase, disables preemption during measurement, and
>> uses a fixed seed for reproducible results.
>>
>> This framework allows for comparing different implementations (e.g.,
>> generic C vs. architecture-optimized assembly) within the KUnit
>> environment.
>
> Acked-by: Andy Shevchenko <andy@kernel.org>
> A few nit-picks below.
>
Thanks for the Acked-by and the detailed review.
> ...
>
>> +static void *alloc_max_bench_buffer(struct kunit *test,
>> + const size_t *lens, size_t count, size_t *buf_len)
>> +{
>> + size_t max_len = 0;
>> + void *buf;
>> +
>> + for (size_t i = 0; i < count; i++)
>> + max_len = max(lens[i], max_len);
>
> You also need minmax.h.
Will add.
> ...
>
>> +#define STRING_BENCH(iters, func, ...) \
>> +({ \
>> + /* Volatile function pointer prevents dead code elimination */ \
>> + typeof(func) (* volatile __func) = (func); \
>> + size_t __bn_iters = (iters); \
>> + size_t __bn_warm_iters; \
>> + u64 __bn_t; \
>
>
> Perhaps a short comment here
>
> /* Use 10% of the given iterations (maximum 50) to warm up */
Will add this comment.
>> + __bn_warm_iters = max(__bn_iters / 10, 50U); \
>> + \
>> + for (size_t __bn_i = 0; __bn_i < __bn_warm_iters; __bn_i++) \
>> + (void)__func(__VA_ARGS__); \
>> + \
>> + preempt_disable(); \
>> + __bn_t = ktime_get_ns(); \
>> + for (size_t __bn_i = 0; __bn_i < __bn_iters; __bn_i++) \
>> + (void)__func(__VA_ARGS__); \
>> + __bn_t = ktime_get_ns() - __bn_t; \
>> + preempt_enable(); \
>> + __bn_t; \
>> +})
>
> ...
>
>> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...) \
>> +do { \
>> + size_t buf_size, _bn_i, _bn_iters, _bn_size = 0; \
>> + u64 _bn_t, _bn_mbps = 0, _bn_lat = 0; \
>> + char *buf_name, *_bn_buf; \
>> + \
>> + _bn_buf = alloc_max_bench_buffer(test, bench_lens, \
>> + ARRAY_SIZE(bench_lens), &_bn_size); \
>> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf); \
>> + \
>> + fill_random_string(_bn_buf, _bn_size); \
>> + \
>> + for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) { \
>> + buf_size = bench_lens[_bn_i]; \
>> + buf_name = _bn_buf + _bn_size - buf_size - 1; \
>> + _bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U); \
>> + \
>> + _bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__); \
>
>> + \
>
> Remove unneeded blank line.
Will fix.
>> + if (_bn_t > 0) { \
>> + _bn_mbps = (u64)(buf_size) * _bn_iters \
>
> Why buf_size in the parentheses here and not anywhere else (above)?
It was a bit inconsistent. I'll remove the unneeded parentheses for buf_size.
> I assume it's just an external temporary variable? But why do we need to have
> it in the parameters to the macro?
This is necessary because buf_size often needs to be passed as an argument
to the function under test. For instance, when benchmarking strnlen, the
caller must pass the current length as an argument:
STRING_BENCH_BUF(test, buf, len, strnlen, buf, len);
>> + * (NSEC_PER_SEC / MEGA); \
> > Leave '*' on the previous line.
Will fix.
>> + _bn_mbps = div64_u64(_bn_mbps, _bn_t); \
>> + _bn_lat = div64_u64(_bn_t, _bn_iters); \
>> + } \
>> + kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n", \
>> + buf_size, _bn_mbps, _bn_lat); \
>> + } \
>> +} while (0)
>
Thank you again for your patience and for helping me improve the quality of
this patch.
--
With Best Regards,
Feng Jiang
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()
2026-01-27 9:33 ` Feng Jiang
@ 2026-01-27 9:50 ` Andy Shevchenko
2026-01-28 1:44 ` Feng Jiang
0 siblings, 1 reply; 17+ messages in thread
From: Andy Shevchenko @ 2026-01-27 9:50 UTC (permalink / raw)
To: Feng Jiang
Cc: pjw, palmer, aou, alex, akpm, kees, andy, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan, linux-riscv, linux-kernel,
linux-hardening
On Tue, Jan 27, 2026 at 05:33:10PM +0800, Feng Jiang wrote:
> On 2026/1/27 16:57, Andy Shevchenko wrote:
> > On Tue, Jan 27, 2026 at 09:25:54AM +0800, Feng Jiang wrote:
...
> >> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...) \
> >> +do { \
> >> + size_t buf_size, _bn_i, _bn_iters, _bn_size = 0; \
> >> + u64 _bn_t, _bn_mbps = 0, _bn_lat = 0; \
> >> + char *buf_name, *_bn_buf; \
> >> + \
> >> + _bn_buf = alloc_max_bench_buffer(test, bench_lens, \
> >> + ARRAY_SIZE(bench_lens), &_bn_size); \
> >> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf); \
> >> + \
> >> + fill_random_string(_bn_buf, _bn_size); \
> >> + \
> >> + for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) { \
> >> + buf_size = bench_lens[_bn_i]; \
> >> + buf_name = _bn_buf + _bn_size - buf_size - 1; \
> >> + _bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U); \
> >> + \
> >> + _bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__); \
> >
> >> + \
> >
> > Remove unneeded blank line.
>
> Will fix.
>
> >> + if (_bn_t > 0) { \
> >> + _bn_mbps = (u64)(buf_size) * _bn_iters \
> >
> > Why buf_size in the parentheses here and not anywhere else (above)?
>
> It was a bit inconsistent. I'll remove the unneeded parentheses for buf_size.
>
> > I assume it's just an external temporary variable? But why do we need to have
> > it in the parameters to the macro?
>
> This is necessary because buf_size often needs to be passed as an argument
> to the function under test. For instance, when benchmarking strnlen, the
> caller must pass the current length as an argument:
> STRING_BENCH_BUF(test, buf, len, strnlen, buf, len);
Okay, and why is it needed in this macro? It get overridden immediately in
the loop. Assuming that the array size of bench lengths is not zero, this
has no visible use. Can you elaborate?
> >> + * (NSEC_PER_SEC / MEGA); \
> > > Leave '*' on the previous line.
>
> Will fix.
>
> >> + _bn_mbps = div64_u64(_bn_mbps, _bn_t); \
> >> + _bn_lat = div64_u64(_bn_t, _bn_iters); \
> >> + } \
> >> + kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n", \
> >> + buf_size, _bn_mbps, _bn_lat); \
> >> + } \
> >> +} while (0)
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()
2026-01-27 9:50 ` Andy Shevchenko
@ 2026-01-28 1:44 ` Feng Jiang
2026-01-28 8:59 ` Andy Shevchenko
0 siblings, 1 reply; 17+ messages in thread
From: Feng Jiang @ 2026-01-28 1:44 UTC (permalink / raw)
To: Andy Shevchenko
Cc: pjw, palmer, aou, alex, akpm, kees, andy, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan, linux-riscv, linux-kernel,
linux-hardening
On 2026/1/27 17:50, Andy Shevchenko wrote:
> On Tue, Jan 27, 2026 at 05:33:10PM +0800, Feng Jiang wrote:
>> On 2026/1/27 16:57, Andy Shevchenko wrote:
>>> On Tue, Jan 27, 2026 at 09:25:54AM +0800, Feng Jiang wrote:
>
> ...
>
>>>> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...) \
>>>> +do { \
>>>> + size_t buf_size, _bn_i, _bn_iters, _bn_size = 0; \
>>>> + u64 _bn_t, _bn_mbps = 0, _bn_lat = 0; \
>>>> + char *buf_name, *_bn_buf; \
>>>> + \
>>>> + _bn_buf = alloc_max_bench_buffer(test, bench_lens, \
>>>> + ARRAY_SIZE(bench_lens), &_bn_size); \
>>>> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf); \
>>>> + \
>>>> + fill_random_string(_bn_buf, _bn_size); \
>>>> + \
>>>> + for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) { \
>>>> + buf_size = bench_lens[_bn_i]; \
>>>> + buf_name = _bn_buf + _bn_size - buf_size - 1; \
>>>> + _bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U); \
>>>> + \
>>>> + _bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__); \
>>>
>>>> + \
>>>
>>> Remove unneeded blank line.
>>
>> Will fix.
>>
>>>> + if (_bn_t > 0) { \
>>>> + _bn_mbps = (u64)(buf_size) * _bn_iters \
>>>
>>> Why buf_size in the parentheses here and not anywhere else (above)?
>>
>> It was a bit inconsistent. I'll remove the unneeded parentheses for buf_size.
>>
>>> I assume it's just an external temporary variable? But why do we need to have
>>> it in the parameters to the macro?
>>
>> This is necessary because buf_size often needs to be passed as an argument
>> to the function under test. For instance, when benchmarking strnlen, the
>> caller must pass the current length as an argument:
>> STRING_BENCH_BUF(test, buf, len, strnlen, buf, len);
>
> Okay, and why is it needed in this macro? It get overridden immediately in
> the loop. Assuming that the array size of bench lengths is not zero, this
> has no visible use. Can you elaborate?
Thank you for the explanation. I see the source of the confusion now.
In v5, buf_name and buf_size were not intended to pass external variables into
the macro. Instead, they were naming placeholders for local variables declared
inside the macro's scope. This allows the caller to define the names used in
the variadic arguments.
To resolve the logical inconsistency you pointed out, I'd like to propose two
options for v6. Which one would you prefer?
Option 1: Internal Declaration (The "Self-Contained" approach)
We declare and initialize the variables directly inside the loop. This keeps
the macro self-contained and the caller doesn't need to pre-declare anything.
for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {
size_t buf_size = bench_lens[_bn_i];
char *buf_name = _bn_buf + _bn_size - buf_size - 1;
...
}
Usage:
STRING_BENCH_BUF(test, my_buf, my_len, strnlen, my_buf, my_len);
Option 2: External Declaration (The list.h approach)
The macro expects the caller to provide pre-declared variables, similar to
list_for_each_entry(). This removes all re-declarations inside the macro.
for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {
buf_size = bench_lens[_bn_i];
buf_name = _bn_buf + _bn_size - buf_size - 1;
...
}
Usage:
size_t my_len;
char *my_buf;
STRING_BENCH_BUF(test, my_buf, my_len, strnlen, my_buf, my_len);
Please let me know which style fits the kernel's preference better, and
I will implement it in v6 along with your other suggestions.
Thanks for the catch!
>
>>>> + * (NSEC_PER_SEC / MEGA); \
>>>> Leave '*' on the previous line.
>>
>> Will fix.
>>
>>>> + _bn_mbps = div64_u64(_bn_mbps, _bn_t); \
>>>> + _bn_lat = div64_u64(_bn_t, _bn_iters); \
>>>> + } \
>>>> + kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n", \
>>>> + buf_size, _bn_mbps, _bn_lat); \
>>>> + } \
>>>> +} while (0)
>
--
With Best Regards,
Feng Jiang
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()
2026-01-28 1:44 ` Feng Jiang
@ 2026-01-28 8:59 ` Andy Shevchenko
2026-01-28 9:20 ` Feng Jiang
0 siblings, 1 reply; 17+ messages in thread
From: Andy Shevchenko @ 2026-01-28 8:59 UTC (permalink / raw)
To: Feng Jiang
Cc: pjw, palmer, aou, alex, akpm, kees, andy, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan, linux-riscv, linux-kernel,
linux-hardening
On Wed, Jan 28, 2026 at 09:44:40AM +0800, Feng Jiang wrote:
> On 2026/1/27 17:50, Andy Shevchenko wrote:
> > On Tue, Jan 27, 2026 at 05:33:10PM +0800, Feng Jiang wrote:
> >> On 2026/1/27 16:57, Andy Shevchenko wrote:
> >>> On Tue, Jan 27, 2026 at 09:25:54AM +0800, Feng Jiang wrote:
...
> >>>> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...) \
> >>>> +do { \
> >>>> + size_t buf_size, _bn_i, _bn_iters, _bn_size = 0; \
> >>>> + u64 _bn_t, _bn_mbps = 0, _bn_lat = 0; \
> >>>> + char *buf_name, *_bn_buf; \
> >>>> + \
> >>>> + _bn_buf = alloc_max_bench_buffer(test, bench_lens, \
> >>>> + ARRAY_SIZE(bench_lens), &_bn_size); \
> >>>> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf); \
> >>>> + \
> >>>> + fill_random_string(_bn_buf, _bn_size); \
> >>>> + \
> >>>> + for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) { \
> >>>> + buf_size = bench_lens[_bn_i]; \
> >>>> + buf_name = _bn_buf + _bn_size - buf_size - 1; \
> >>>> + _bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U); \
> >>>> + \
> >>>> + _bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__); \
> >>>
> >>>> + \
> >>>
> >>> Remove unneeded blank line.
> >>
> >> Will fix.
> >>
> >>>> + if (_bn_t > 0) { \
> >>>> + _bn_mbps = (u64)(buf_size) * _bn_iters \
> >>>
> >>> Why buf_size in the parentheses here and not anywhere else (above)?
> >>
> >> It was a bit inconsistent. I'll remove the unneeded parentheses for buf_size.
> >>
> >>> I assume it's just an external temporary variable? But why do we need to have
> >>> it in the parameters to the macro?
> >>
> >> This is necessary because buf_size often needs to be passed as an argument
> >> to the function under test. For instance, when benchmarking strnlen, the
> >> caller must pass the current length as an argument:
> >> STRING_BENCH_BUF(test, buf, len, strnlen, buf, len);
> >
> > Okay, and why is it needed in this macro? It get overridden immediately in
> > the loop. Assuming that the array size of bench lengths is not zero, this
> > has no visible use. Can you elaborate?
>
> Thank you for the explanation. I see the source of the confusion now.
>
> In v5, buf_name and buf_size were not intended to pass external variables into
> the macro. Instead, they were naming placeholders for local variables declared
> inside the macro's scope. This allows the caller to define the names used in
> the variadic arguments.
>
> To resolve the logical inconsistency you pointed out, I'd like to propose two
> options for v6. Which one would you prefer?
>
> Option 1: Internal Declaration (The "Self-Contained" approach)
>
> We declare and initialize the variables directly inside the loop. This keeps
> the macro self-contained and the caller doesn't need to pre-declare anything.
>
> for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {
> size_t buf_size = bench_lens[_bn_i];
> char *buf_name = _bn_buf + _bn_size - buf_size - 1;
> ...
> }
>
> Usage:
> STRING_BENCH_BUF(test, my_buf, my_len, strnlen, my_buf, my_len);
This option is better as long as the user doesn't need to know the (stale) data
out of these parameters. And I think this is the case, so #1 is the winner.
> Option 2: External Declaration (The list.h approach)
>
> The macro expects the caller to provide pre-declared variables, similar to
> list_for_each_entry(). This removes all re-declarations inside the macro.
>
> for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {
> buf_size = bench_lens[_bn_i];
> buf_name = _bn_buf + _bn_size - buf_size - 1;
> ...
> }
>
> Usage:
> size_t my_len;
> char *my_buf;
> STRING_BENCH_BUF(test, my_buf, my_len, strnlen, my_buf, my_len);
>
> Please let me know which style fits the kernel's preference better, and
> I will implement it in v6 along with your other suggestions.
>
> Thanks for the catch!
>
> >>>> + * (NSEC_PER_SEC / MEGA); \
> >>>> Leave '*' on the previous line.
> >>
> >> Will fix.
> >>
> >>>> + _bn_mbps = div64_u64(_bn_mbps, _bn_t); \
> >>>> + _bn_lat = div64_u64(_bn_t, _bn_iters); \
> >>>> + } \
> >>>> + kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n", \
> >>>> + buf_size, _bn_mbps, _bn_lat); \
> >>>> + } \
> >>>> +} while (0)
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()
2026-01-28 8:59 ` Andy Shevchenko
@ 2026-01-28 9:20 ` Feng Jiang
0 siblings, 0 replies; 17+ messages in thread
From: Feng Jiang @ 2026-01-28 9:20 UTC (permalink / raw)
To: Andy Shevchenko
Cc: pjw, palmer, aou, alex, akpm, kees, andy, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan, linux-riscv, linux-kernel,
linux-hardening
On 2026/1/28 16:59, Andy Shevchenko wrote:
> On Wed, Jan 28, 2026 at 09:44:40AM +0800, Feng Jiang wrote:
>> On 2026/1/27 17:50, Andy Shevchenko wrote:
>>> On Tue, Jan 27, 2026 at 05:33:10PM +0800, Feng Jiang wrote:
>>>> On 2026/1/27 16:57, Andy Shevchenko wrote:
>>>>> On Tue, Jan 27, 2026 at 09:25:54AM +0800, Feng Jiang wrote:
>
> ...
>
>>>>>> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...) \
>>>>>> +do { \
>>>>>> + size_t buf_size, _bn_i, _bn_iters, _bn_size = 0; \
>>>>>> + u64 _bn_t, _bn_mbps = 0, _bn_lat = 0; \
>>>>>> + char *buf_name, *_bn_buf; \
>>>>>> + \
>>>>>> + _bn_buf = alloc_max_bench_buffer(test, bench_lens, \
>>>>>> + ARRAY_SIZE(bench_lens), &_bn_size); \
>>>>>> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf); \
>>>>>> + \
>>>>>> + fill_random_string(_bn_buf, _bn_size); \
>>>>>> + \
>>>>>> + for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) { \
>>>>>> + buf_size = bench_lens[_bn_i]; \
>>>>>> + buf_name = _bn_buf + _bn_size - buf_size - 1; \
>>>>>> + _bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U); \
>>>>>> + \
>>>>>> + _bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__); \
>>>>>
>>>>>> + \
>>>>>
>>>>> Remove unneeded blank line.
>>>>
>>>> Will fix.
>>>>
>>>>>> + if (_bn_t > 0) { \
>>>>>> + _bn_mbps = (u64)(buf_size) * _bn_iters \
>>>>>
>>>>> Why buf_size in the parentheses here and not anywhere else (above)?
>>>>
>>>> It was a bit inconsistent. I'll remove the unneeded parentheses for buf_size.
>>>>
>>>>> I assume it's just an external temporary variable? But why do we need to have
>>>>> it in the parameters to the macro?
>>>>
>>>> This is necessary because buf_size often needs to be passed as an argument
>>>> to the function under test. For instance, when benchmarking strnlen, the
>>>> caller must pass the current length as an argument:
>>>> STRING_BENCH_BUF(test, buf, len, strnlen, buf, len);
>>>
>>> Okay, and why is it needed in this macro? It get overridden immediately in
>>> the loop. Assuming that the array size of bench lengths is not zero, this
>>> has no visible use. Can you elaborate?
>>
>> Thank you for the explanation. I see the source of the confusion now.
>>
>> In v5, buf_name and buf_size were not intended to pass external variables into
>> the macro. Instead, they were naming placeholders for local variables declared
>> inside the macro's scope. This allows the caller to define the names used in
>> the variadic arguments.
>>
>> To resolve the logical inconsistency you pointed out, I'd like to propose two
>> options for v6. Which one would you prefer?
>>
>> Option 1: Internal Declaration (The "Self-Contained" approach)
>>
>> We declare and initialize the variables directly inside the loop. This keeps
>> the macro self-contained and the caller doesn't need to pre-declare anything.
>>
>> for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {
>> size_t buf_size = bench_lens[_bn_i];
>> char *buf_name = _bn_buf + _bn_size - buf_size - 1;
>> ...
>> }
>>
>> Usage:
>> STRING_BENCH_BUF(test, my_buf, my_len, strnlen, my_buf, my_len);
>
> This option is better as long as the user doesn't need to know the (stale) data
> out of these parameters. And I think this is the case, so #1 is the winner.
Thanks for the feedback. I'll incorporate this change, along with the other
improvements we discussed, and send out v6 shortly.
>> Option 2: External Declaration (The list.h approach)
>>
>> The macro expects the caller to provide pre-declared variables, similar to
>> list_for_each_entry(). This removes all re-declarations inside the macro.
>>
>> for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {
>> buf_size = bench_lens[_bn_i];
>> buf_name = _bn_buf + _bn_size - buf_size - 1;
>> ...
>> }
>>
>> Usage:
>> size_t my_len;
>> char *my_buf;
>> STRING_BENCH_BUF(test, my_buf, my_len, strnlen, my_buf, my_len);
>>
>> Please let me know which style fits the kernel's preference better, and
>> I will implement it in v6 along with your other suggestions.
>>
>> Thanks for the catch!
>>
>>>>>> + * (NSEC_PER_SEC / MEGA); \
>>>>>> Leave '*' on the previous line.
>>>>
>>>> Will fix.
>>>>
>>>>>> + _bn_mbps = div64_u64(_bn_mbps, _bn_t); \
>>>>>> + _bn_lat = div64_u64(_bn_t, _bn_iters); \
>>>>>> + } \
>>>>>> + kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n", \
>>>>>> + buf_size, _bn_mbps, _bn_lat); \
>>>>>> + } \
>>>>>> +} while (0)
>
--
With Best Regards,
Feng Jiang
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH v5 5/8] lib/string_kunit: extend benchmarks to strnlen() and chr searches
2026-01-27 1:25 [PATCH v5 0/8] riscv: optimize string functions and add kunit tests Feng Jiang
` (3 preceding siblings ...)
2026-01-27 1:25 ` [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen() Feng Jiang
@ 2026-01-27 1:25 ` Feng Jiang
2026-01-27 1:25 ` [PATCH v5 6/8] riscv: lib: add strnlen() implementation Feng Jiang
` (2 subsequent siblings)
7 siblings, 0 replies; 17+ messages in thread
From: Feng Jiang @ 2026-01-27 1:25 UTC (permalink / raw)
To: pjw, palmer, aou, alex, akpm, kees, andy, jiangfeng, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan
Cc: linux-riscv, linux-kernel, linux-hardening
Extend the string benchmarking suite to include strnlen(), strchr(),
and strrchr().
For character search functions strchr() and strrchr(), the benchmark
targets the NUL character. This ensures the entire string is scanned,
providing a consistent measure of full-length processing efficiency
comparable to strlen().
Suggested-by: Andy Shevchenko <andy@kernel.org>
Suggested-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
Acked-by: Andy Shevchenko <andy@kernel.org>
---
lib/tests/string_kunit.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/lib/tests/string_kunit.c b/lib/tests/string_kunit.c
index 3fcd55953bd7..c338d35d3ea3 100644
--- a/lib/tests/string_kunit.c
+++ b/lib/tests/string_kunit.c
@@ -858,6 +858,21 @@ static void string_bench_strlen(struct kunit *test)
STRING_BENCH_BUF(test, buf, len, strlen, buf);
}
+static void string_bench_strnlen(struct kunit *test)
+{
+ STRING_BENCH_BUF(test, buf, len, strnlen, buf, len);
+}
+
+static void string_bench_strchr(struct kunit *test)
+{
+ STRING_BENCH_BUF(test, buf, len, strchr, buf, '\0');
+}
+
+static void string_bench_strrchr(struct kunit *test)
+{
+ STRING_BENCH_BUF(test, buf, len, strrchr, buf, '\0');
+}
+
static struct kunit_case string_test_cases[] = {
KUNIT_CASE(string_test_memset16),
KUNIT_CASE(string_test_memset32),
@@ -884,6 +899,9 @@ static struct kunit_case string_test_cases[] = {
KUNIT_CASE(string_test_memtostr),
KUNIT_CASE(string_test_strends),
KUNIT_CASE(string_bench_strlen),
+ KUNIT_CASE(string_bench_strnlen),
+ KUNIT_CASE(string_bench_strchr),
+ KUNIT_CASE(string_bench_strrchr),
{}
};
--
2.25.1
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v5 6/8] riscv: lib: add strnlen() implementation
2026-01-27 1:25 [PATCH v5 0/8] riscv: optimize string functions and add kunit tests Feng Jiang
` (4 preceding siblings ...)
2026-01-27 1:25 ` [PATCH v5 5/8] lib/string_kunit: extend benchmarks to strnlen() and chr searches Feng Jiang
@ 2026-01-27 1:25 ` Feng Jiang
2026-01-27 1:25 ` [PATCH v5 7/8] riscv: lib: add strchr() implementation Feng Jiang
2026-01-27 1:25 ` [PATCH v5 8/8] riscv: lib: add strrchr() implementation Feng Jiang
7 siblings, 0 replies; 17+ messages in thread
From: Feng Jiang @ 2026-01-27 1:25 UTC (permalink / raw)
To: pjw, palmer, aou, alex, akpm, kees, andy, jiangfeng, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan
Cc: linux-riscv, linux-kernel, linux-hardening, Qingfang Deng
Add an optimized strnlen() implementation for RISC-V. This version
includes a generic optimization and a Zbb-powered optimization using
the 'orc.b' instruction, derived from the strlen() implementation.
Benchmark results (QEMU TCG, rv64):
Length | Original (MB/s) | Optimized (MB/s) | Improvement
-------|-----------------|------------------|------------
16 B | 179 | 309 | +72.6%
512 B | 347 | 1562 | +350.1%
4096 B | 356 | 1878 | +427.5%
Suggested-by: Qingfang Deng <dqfext@gmail.com>
Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
---
arch/riscv/include/asm/string.h | 3 +
arch/riscv/lib/Makefile | 1 +
arch/riscv/lib/strnlen.S | 164 ++++++++++++++++++++++++++++++++
arch/riscv/purgatory/Makefile | 5 +-
4 files changed, 172 insertions(+), 1 deletion(-)
create mode 100644 arch/riscv/lib/strnlen.S
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 5ba77f60bf0b..16634d67c217 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -28,6 +28,9 @@ extern asmlinkage __kernel_size_t strlen(const char *);
#define __HAVE_ARCH_STRNCMP
extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count);
+
+#define __HAVE_ARCH_STRNLEN
+extern asmlinkage __kernel_size_t strnlen(const char *, size_t);
#endif
/* For those files which don't want to check by kasan. */
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index bbc031124974..0969d8136df0 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -7,6 +7,7 @@ ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
lib-y += strcmp.o
lib-y += strlen.o
lib-y += strncmp.o
+lib-y += strnlen.o
endif
lib-y += csum.o
ifeq ($(CONFIG_MMU), y)
diff --git a/arch/riscv/lib/strnlen.S b/arch/riscv/lib/strnlen.S
new file mode 100644
index 000000000000..53afa7b5b314
--- /dev/null
+++ b/arch/riscv/lib/strnlen.S
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * Base on arch/riscv/lib/strlen.S
+ *
+ * Copyright (C) Feng Jiang <jiangfeng@kylinos.cn>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
+
+/* size_t strnlen(const char *s, size_t count) */
+SYM_FUNC_START(strnlen)
+
+ __ALTERNATIVE_CFG("nop", "j strnlen_zbb", 0, RISCV_ISA_EXT_ZBB,
+ IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
+
+
+ /*
+ * Returns
+ * a0 - String length
+ *
+ * Parameters
+ * a0 - String to measure
+ * a1 - Max length of string
+ *
+ * Clobbers
+ * t0, t1, t2
+ */
+ addi t1, a0, -1
+ add t2, a0, a1
+1:
+ addi t1, t1, 1
+ beq t1, t2, 2f
+ lbu t0, 0(t1)
+ bnez t0, 1b
+2:
+ sub a0, t1, a0
+ ret
+
+
+/*
+ * Variant of strnlen using the ZBB extension if available
+ */
+#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)
+strnlen_zbb:
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+# define CZ clz
+# define SHIFT sll
+#else
+# define CZ ctz
+# define SHIFT srl
+#endif
+
+.option push
+.option arch,+zbb
+
+ /*
+ * Returns
+ * a0 - String length
+ *
+ * Parameters
+ * a0 - String to measure
+ * a1 - Max length of string
+ *
+ * Clobbers
+ * t0, t1, t2, t3, t4
+ */
+
+ /* If maxlen is 0, return 0. */
+ beqz a1, 3f
+
+ /* Number of irrelevant bytes in the first word. */
+ andi t2, a0, SZREG-1
+
+ /* Align pointer. */
+ andi t0, a0, -SZREG
+
+ li t3, SZREG
+ sub t3, t3, t2
+ slli t2, t2, 3
+
+ /* Aligned boundary. */
+ add t4, a0, a1
+ andi t4, t4, -SZREG
+
+ /* Get the first word. */
+ REG_L t1, 0(t0)
+
+ /*
+ * Shift away the partial data we loaded to remove the irrelevant bytes
+ * preceding the string with the effect of adding NUL bytes at the
+ * end of the string's first word.
+ */
+ SHIFT t1, t1, t2
+
+ /* Convert non-NUL into 0xff and NUL into 0x00. */
+ orc.b t1, t1
+
+ /* Convert non-NUL into 0x00 and NUL into 0xff. */
+ not t1, t1
+
+ /*
+ * Search for the first set bit (corresponding to a NUL byte in the
+ * original chunk).
+ */
+ CZ t1, t1
+
+ /*
+ * The first chunk is special: compare against the number
+ * of valid bytes in this chunk.
+ */
+ srli a0, t1, 3
+
+ /* Limit the result by maxlen. */
+ minu a0, a0, a1
+
+ bgtu t3, a0, 2f
+
+ /* Prepare for the word comparison loop. */
+ addi t2, t0, SZREG
+ li t3, -1
+
+ /*
+ * Our critical loop is 4 instructions and processes data in
+ * 4 byte or 8 byte chunks.
+ */
+ .p2align 3
+1:
+ REG_L t1, SZREG(t0)
+ addi t0, t0, SZREG
+ orc.b t1, t1
+ bgeu t0, t4, 4f
+ beq t1, t3, 1b
+4:
+ not t1, t1
+ CZ t1, t1
+ srli t1, t1, 3
+
+ /* Get number of processed bytes. */
+ sub t2, t0, t2
+
+ /* Add number of characters in the first word. */
+ add a0, a0, t2
+
+ /* Add number of characters in the last word. */
+ add a0, a0, t1
+
+ /* Ensure the final result does not exceed maxlen. */
+ minu a0, a0, a1
+2:
+ ret
+3:
+ mv a0, a1
+ ret
+
+.option pop
+#endif
+SYM_FUNC_END(strnlen)
+SYM_FUNC_ALIAS(__pi_strnlen, strnlen)
+EXPORT_SYMBOL(strnlen)
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index 530e497ca2f9..d7c0533108be 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -2,7 +2,7 @@
purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o
ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
-purgatory-y += strcmp.o strlen.o strncmp.o
+purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o
endif
targets += $(purgatory-y)
@@ -32,6 +32,9 @@ $(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
+$(obj)/strnlen.o: $(srctree)/arch/riscv/lib/strnlen.S FORCE
+ $(call if_changed_rule,as_o_S)
+
CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
CFLAGS_string.o := -D__DISABLE_EXPORTS
CFLAGS_ctype.o := -D__DISABLE_EXPORTS
--
2.25.1
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v5 7/8] riscv: lib: add strchr() implementation
2026-01-27 1:25 [PATCH v5 0/8] riscv: optimize string functions and add kunit tests Feng Jiang
` (5 preceding siblings ...)
2026-01-27 1:25 ` [PATCH v5 6/8] riscv: lib: add strnlen() implementation Feng Jiang
@ 2026-01-27 1:25 ` Feng Jiang
2026-01-27 1:25 ` [PATCH v5 8/8] riscv: lib: add strrchr() implementation Feng Jiang
7 siblings, 0 replies; 17+ messages in thread
From: Feng Jiang @ 2026-01-27 1:25 UTC (permalink / raw)
To: pjw, palmer, aou, alex, akpm, kees, andy, jiangfeng, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan
Cc: linux-riscv, linux-kernel, linux-hardening
Add an assembly implementation of strchr() for RISC-V.
By eliminating stack frame management (prologue/epilogue) and optimizing
the function entries, the assembly version provides significant relative
gains for short strings where the fixed overhead of the C function is
most prominent. As string length increases, performance converges with
the generic C implementation.
Benchmark results (QEMU TCG, rv64):
Length | Original (MB/s) | Optimized (MB/s) | Improvement
-------|-----------------|------------------|------------
1 B | 21 | 22 | +4.8%
7 B | 113 | 121 | +7.1%
16 B | 195 | 202 | +3.6%
512 B | 376 | 389 | +3.5%
4096 B | 394 | 393 | -0.3%
Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
---
arch/riscv/include/asm/string.h | 3 +++
arch/riscv/lib/Makefile | 1 +
arch/riscv/lib/strchr.S | 35 +++++++++++++++++++++++++++++++++
arch/riscv/purgatory/Makefile | 5 ++++-
4 files changed, 43 insertions(+), 1 deletion(-)
create mode 100644 arch/riscv/lib/strchr.S
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 16634d67c217..ca3ade82b124 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -31,6 +31,9 @@ extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count);
#define __HAVE_ARCH_STRNLEN
extern asmlinkage __kernel_size_t strnlen(const char *, size_t);
+
+#define __HAVE_ARCH_STRCHR
+extern asmlinkage char *strchr(const char *, int);
#endif
/* For those files which don't want to check by kasan. */
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 0969d8136df0..b7f804dce1c3 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -8,6 +8,7 @@ lib-y += strcmp.o
lib-y += strlen.o
lib-y += strncmp.o
lib-y += strnlen.o
+lib-y += strchr.o
endif
lib-y += csum.o
ifeq ($(CONFIG_MMU), y)
diff --git a/arch/riscv/lib/strchr.S b/arch/riscv/lib/strchr.S
new file mode 100644
index 000000000000..48c3a9da53e3
--- /dev/null
+++ b/arch/riscv/lib/strchr.S
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * Copyright (C) 2025 Feng Jiang <jiangfeng@kylinos.cn>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+/* char *strchr(const char *s, int c) */
+SYM_FUNC_START(strchr)
+ /*
+ * Parameters
+ * a0 - The string to be searched
+ * a1 - The character to search for
+ *
+ * Returns
+ * a0 - Address of first occurrence of 'c' or 0
+ *
+ * Clobbers
+ * t0
+ */
+ andi a1, a1, 0xff
+1:
+ lbu t0, 0(a0)
+ beq t0, a1, 2f
+ addi a0, a0, 1
+ bnez t0, 1b
+ li a0, 0
+2:
+ ret
+SYM_FUNC_END(strchr)
+
+SYM_FUNC_ALIAS_WEAK(__pi_strchr, strchr)
+EXPORT_SYMBOL(strchr)
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index d7c0533108be..e7b3d748c913 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -2,7 +2,7 @@
purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o
ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
-purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o
+purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o strchr.o
endif
targets += $(purgatory-y)
@@ -35,6 +35,9 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
$(obj)/strnlen.o: $(srctree)/arch/riscv/lib/strnlen.S FORCE
$(call if_changed_rule,as_o_S)
+$(obj)/strchr.o: $(srctree)/arch/riscv/lib/strchr.S FORCE
+ $(call if_changed_rule,as_o_S)
+
CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
CFLAGS_string.o := -D__DISABLE_EXPORTS
CFLAGS_ctype.o := -D__DISABLE_EXPORTS
--
2.25.1
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH v5 8/8] riscv: lib: add strrchr() implementation
2026-01-27 1:25 [PATCH v5 0/8] riscv: optimize string functions and add kunit tests Feng Jiang
` (6 preceding siblings ...)
2026-01-27 1:25 ` [PATCH v5 7/8] riscv: lib: add strchr() implementation Feng Jiang
@ 2026-01-27 1:25 ` Feng Jiang
7 siblings, 0 replies; 17+ messages in thread
From: Feng Jiang @ 2026-01-27 1:25 UTC (permalink / raw)
To: pjw, palmer, aou, alex, akpm, kees, andy, jiangfeng, ebiggers,
martin.petersen, sohil.mehta, charlie, conor.dooley,
samuel.holland, linus.walleij, nathan
Cc: linux-riscv, linux-kernel, linux-hardening
Add an assembly implementation of strrchr() for RISC-V.
This implementation minimizes instruction count and avoids unnecessary
memory access to the stack. The performance benefits are most visible
on small workloads (1-16 bytes) where the architectural savings in
function overhead outweigh the execution time of the scan loop.
Benchmark results (QEMU TCG, rv64):
Length | Original (MB/s) | Optimized (MB/s) | Improvement
-------|-----------------|------------------|------------
1 B | 20 | 21 | +5.0%
7 B | 111 | 120 | +8.1%
16 B | 189 | 199 | +5.3%
512 B | 361 | 382 | +5.8%
4096 B | 388 | 391 | +0.8%
Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
---
arch/riscv/include/asm/string.h | 3 +++
arch/riscv/lib/Makefile | 1 +
arch/riscv/lib/strrchr.S | 37 +++++++++++++++++++++++++++++++++
arch/riscv/purgatory/Makefile | 5 ++++-
4 files changed, 45 insertions(+), 1 deletion(-)
create mode 100644 arch/riscv/lib/strrchr.S
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index ca3ade82b124..764ffe8f6479 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -34,6 +34,9 @@ extern asmlinkage __kernel_size_t strnlen(const char *, size_t);
#define __HAVE_ARCH_STRCHR
extern asmlinkage char *strchr(const char *, int);
+
+#define __HAVE_ARCH_STRRCHR
+extern asmlinkage char *strrchr(const char *, int);
#endif
/* For those files which don't want to check by kasan. */
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index b7f804dce1c3..735d0b665536 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -9,6 +9,7 @@ lib-y += strlen.o
lib-y += strncmp.o
lib-y += strnlen.o
lib-y += strchr.o
+lib-y += strrchr.o
endif
lib-y += csum.o
ifeq ($(CONFIG_MMU), y)
diff --git a/arch/riscv/lib/strrchr.S b/arch/riscv/lib/strrchr.S
new file mode 100644
index 000000000000..ac58b20ca21d
--- /dev/null
+++ b/arch/riscv/lib/strrchr.S
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * Copyright (C) 2025 Feng Jiang <jiangfeng@kylinos.cn>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+/* char *strrchr(const char *s, int c) */
+SYM_FUNC_START(strrchr)
+ /*
+ * Parameters
+ * a0 - The string to be searched
+ * a1 - The character to seaerch for
+ *
+ * Returns
+ * a0 - Address of last occurrence of 'c' or 0
+ *
+ * Clobbers
+ * t0, t1
+ */
+ andi a1, a1, 0xff
+ mv t1, a0
+ li a0, 0
+1:
+ lbu t0, 0(t1)
+ bne t0, a1, 2f
+ mv a0, t1
+2:
+ addi t1, t1, 1
+ bnez t0, 1b
+ ret
+SYM_FUNC_END(strrchr)
+
+SYM_FUNC_ALIAS_WEAK(__pi_strrchr, strrchr)
+EXPORT_SYMBOL(strrchr)
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index e7b3d748c913..b0358a78f11a 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -2,7 +2,7 @@
purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o
ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
-purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o strchr.o
+purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o strchr.o strrchr.o
endif
targets += $(purgatory-y)
@@ -38,6 +38,9 @@ $(obj)/strnlen.o: $(srctree)/arch/riscv/lib/strnlen.S FORCE
$(obj)/strchr.o: $(srctree)/arch/riscv/lib/strchr.S FORCE
$(call if_changed_rule,as_o_S)
+$(obj)/strrchr.o: $(srctree)/arch/riscv/lib/strrchr.S FORCE
+ $(call if_changed_rule,as_o_S)
+
CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
CFLAGS_string.o := -D__DISABLE_EXPORTS
CFLAGS_ctype.o := -D__DISABLE_EXPORTS
--
2.25.1
^ permalink raw reply related [flat|nested] 17+ messages in thread