* [PATCH v2 1/2] riscv: lib: add memcmp() implementation
2026-05-14 12:13 [PATCH v2 0/2] riscv: lib: add optimized memcmp() and extend KUnit tests Milan Tripkovic
@ 2026-05-14 12:13 ` Milan Tripkovic
2026-05-14 12:13 ` [PATCH v2 2/2] lib/string_kunit: extend benchmarks and unit test to memcmp() Milan Tripkovic
1 sibling, 0 replies; 4+ messages in thread
From: Milan Tripkovic @ 2026-05-14 12:13 UTC (permalink / raw)
To: Paul Walmsley, Palmer Dabbelt, Albert Ou
Cc: Alexandre Ghiti, Kees Cook, Andy Shevchenko, Nathan Chancellor,
Nick Desaulniers, Bill Wendling, Justin Stitt, linux-riscv,
linux-kernel, linux-hardening, llvm, Dusan Stojkovic,
Milan Tripkovic
From: Milan Tripkovic <Milan.Tripkovic@rt-rk.com>
Add an assembly implementation of memcmp() for RISC-V. The implementation
uses the ZBB extension for word-at-a-time comparison and an assembly
fallback for non-ZBB systems.
Benchmark results (QEMU TCG, rv64, Aligned):
Len | Default | NoZBB | ZBB | %NoZBB | %ZBB
------|---------|--------|--------|--------|-------
1 B | 20.3 | 25.0 | 20.9 | +23.2% | +3.0%
7 B | 88.9 | 107.5 | 155.7 | +20.9% | +75.1%
8 B | 89.6 | 110.9 | 176.2 | +23.8% | +96.7%
16 B | 134.4 | 172.4 | 334.8 | +28.3% | +149.1%
31 B | 163.5 | 220.5 | 606.2 | +34.9% | +270.8%
64 B | 203.8 | 235.9 | 968.6 | +15.8% | +375.3%
127 B | 224.6 | 268.7 | 1362.8 | +19.6% | +506.8%
512 B | 235.7 | 271.1 | 1913.7 | +15.0% | +711.9%
1024 B| 256.8 | 290.6 | 2123.6 | +13.2% | +726.9%
4096 B| 263.8 | 302.9 | 2290.4 | +14.8% | +768.2%
Benchmark results (QEMU TCG, rv64, Unaligned - Offset 3):
Len | Default | NoZBB | ZBB | %NoZBB | %ZBB
------|---------|--------|--------|--------|-------
1 B | 20.7 | 21.7 | 21.5 | +4.8% | +3.9%
7 B | 96.2 | 99.1 | 96.9 | +3.0% | +0.7%
8 B | 97.5 | 118.5 | 110.5 | +21.5% | +13.3%
16 B | 136.7 | 166.6 | 172.8 | +21.9% | +26.4%
31 B | 167.6 | 206.5 | 211.9 | +23.2% | +26.4%
64 B | 204.4 | 229.9 | 240.3 | +12.5% | +17.6%
127 B | 229.6 | 261.7 | 269.0 | +14.0% | +17.2%
512 B | 245.5 | 260.8 | 269.9 | +6.2% | +9.9%
1024 B| 246.9 | 261.2 | 283.5 | +5.8% | +14.8%
4096 B| 250.7 | 295.8 | 299.7 | +18.0% | +19.5%
Signed-off-by: Milan Tripkovic <Milan.Tripkovic@rt-rk.com>
---
arch/riscv/include/asm/string.h | 2 +
arch/riscv/lib/Makefile | 1 +
arch/riscv/lib/memcmp.S | 124 ++++++++++++++++++++++++++++++++
arch/riscv/purgatory/Makefile | 5 +-
4 files changed, 131 insertions(+), 1 deletion(-)
create mode 100644 arch/riscv/lib/memcmp.S
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 764ffe8f6479..5c5299678c66 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -18,6 +18,8 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
#define __HAVE_ARCH_MEMMOVE
extern asmlinkage void *memmove(void *, const void *, size_t);
extern asmlinkage void *__memmove(void *, const void *, size_t);
+#define __HAVE_ARCH_MEMCMP
+extern asmlinkage int memcmp(const void *, const void *, size_t);
#if !(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
#define __HAVE_ARCH_STRCMP
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 6f767b2a349d..b529e1be18b2 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -3,6 +3,7 @@ lib-y += delay.o
lib-y += memcpy.o
lib-y += memset.o
lib-y += memmove.o
+lib-y += memcmp.o
ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
lib-y += strcmp.o
lib-y += strlen.o
diff --git a/arch/riscv/lib/memcmp.S b/arch/riscv/lib/memcmp.S
new file mode 100644
index 000000000000..f7d0eaa08880
--- /dev/null
+++ b/arch/riscv/lib/memcmp.S
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
+
+/* int memcmp(const void *cs, const void *ct, size_t n) */
+SYM_FUNC_START(memcmp)
+
+ __ALTERNATIVE_CFG("nop", "j memcmp_zbb", 0, RISCV_ISA_EXT_ZBB,
+ IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
+/*
+ * Parameters
+ * a0 - Pointer to first memory block (cs), also return value
+ * a1 - Pointer to second memory block (ct)
+ * a2 - Number of bytes to compare (n), transformed to end pointer (a0 + n)
+ *
+ * Returns
+ * a0 - 0 if equal, positive if cs > ct, negative if cs < ct
+ *
+ * Clobbers
+ * t0, t1
+ */
+ beqz a2, 2f
+ add a2, a0, a2
+1:
+ lbu t0, 0(a0)
+ lbu t1, 0(a1)
+ bne t0, t1, 3f
+ addi a0, a0, 1
+ addi a1, a1, 1
+ bne a0, a2, 1b
+2:
+ li a0, 0
+ ret
+3:
+ sub a0, t0, t1
+ ret
+
+
+memcmp_zbb:
+.option push
+.option arch,+zbb
+/*
+ * Parameters
+ * a0 - Pointer to first memory block (cs), also return value
+ * a1 - Pointer to second memory block (ct)
+ * a2 - Number of bytes to compare (n), decremented during loop
+ *
+ * Returns
+ * a0 - 0 if equal, positive if cs > ct, negative if cs < ct
+ *
+ * Clobbers
+ * t0, t1, t2, t3, t4
+ */
+ add t3, a0, a2
+ or t0, a0, a1
+ andi t0, t0, (SZREG - 1)
+ bnez t0, 5f
+
+ addi t4, t3, -SZREG
+ bltu t4, a0, 7f
+
+1:
+ REG_L t1, 0(a0)
+ REG_L t2, 0(a1)
+ bne t1, t2, 2f
+ addi a0, a0, SZREG
+ addi a1, a1, SZREG
+ bleu a0, t4, 1b
+
+7:
+ beq a0, t3, 4f
+ REG_L t1, 0(a0)
+ REG_L t2, 0(a1)
+
+ sub t0, t3, a0
+ li t4, SZREG
+ sub t0, t4, t0
+ slli t0, t0, 3
+
+#ifndef CONFIG_CPU_BIG_ENDIAN
+ rev8 t1, t1
+ rev8 t2, t2
+#endif
+ srl t1, t1, t0
+ srl t2, t2, t0
+
+ bne t1, t2, 8f
+ li a0, 0
+ ret
+5:
+ beq a0, t3, 4f
+6:
+ lbu t1, 0(a0)
+ lbu t2, 0(a1)
+ bne t1, t2, 3f
+ addi a0, a0, 1
+ addi a1, a1, 1
+ bne a0, t3, 6b
+
+4: li a0, 0
+ ret
+2:
+#ifndef CONFIG_CPU_BIG_ENDIAN
+ rev8 t1, t1
+ rev8 t2, t2
+#endif
+8:
+ sltu a0, t2, t1
+ sltu t0, t1, t2
+ sub a0, a0, t0
+ ret
+
+3:
+ sub a0, t1, t2
+ ret
+
+.option pop
+
+SYM_FUNC_END(memcmp)
+SYM_FUNC_ALIAS(__pi_memcmp, memcmp)
+EXPORT_SYMBOL(memcmp)
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index b0358a78f11a..456929971da7 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o
+purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o memcmp.o
ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o strchr.o strrchr.o
endif
@@ -41,6 +41,9 @@ $(obj)/strchr.o: $(srctree)/arch/riscv/lib/strchr.S FORCE
$(obj)/strrchr.o: $(srctree)/arch/riscv/lib/strrchr.S FORCE
$(call if_changed_rule,as_o_S)
+$(obj)/memcmp.o: $(srctree)/arch/riscv/lib/memcmp.S FORCE
+ $(call if_changed_rule,as_o_S)
+
CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
CFLAGS_string.o := -D__DISABLE_EXPORTS
CFLAGS_ctype.o := -D__DISABLE_EXPORTS
--
2.43.0
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH v2 2/2] lib/string_kunit: extend benchmarks and unit test to memcmp()
2026-05-14 12:13 [PATCH v2 0/2] riscv: lib: add optimized memcmp() and extend KUnit tests Milan Tripkovic
2026-05-14 12:13 ` [PATCH v2 1/2] riscv: lib: add memcmp() implementation Milan Tripkovic
@ 2026-05-14 12:13 ` Milan Tripkovic
2026-05-14 16:19 ` Kees Cook
1 sibling, 1 reply; 4+ messages in thread
From: Milan Tripkovic @ 2026-05-14 12:13 UTC (permalink / raw)
To: Paul Walmsley, Palmer Dabbelt, Albert Ou
Cc: Alexandre Ghiti, Kees Cook, Andy Shevchenko, Nathan Chancellor,
Nick Desaulniers, Bill Wendling, Justin Stitt, linux-riscv,
linux-kernel, linux-hardening, llvm, Dusan Stojkovic,
Milan Tripkovic, kernel test robot
From: Milan Tripkovic <Milan.Tripkovic@rt-rk.com>
Extend the string benchmarking suite to include memcmp().
Extend the string unit test to include memcmp().
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202605140827.Qg1DZpcB-lkp@intel.com/
Signed-off-by: Milan Tripkovic <Milan.Tripkovic@rt-rk.com>
---
lib/tests/string_kunit.c | 106 +++++++++++++++++++++++++++++++++++++++
1 file changed, 106 insertions(+)
diff --git a/lib/tests/string_kunit.c b/lib/tests/string_kunit.c
index 0819ace5b027..d0bad40a719a 100644
--- a/lib/tests/string_kunit.c
+++ b/lib/tests/string_kunit.c
@@ -881,6 +881,110 @@ static void string_bench_strrchr(struct kunit *test)
STRING_BENCH_BUF(test, buf, len, strrchr, buf, '\0');
}
+static void string_test_memcmp(struct kunit *test)
+{
+ const int max_offset = 16;
+ const int max_len = 32;
+ const int buf_size = max_offset + max_len + 32;
+ u8 *buf1, *buf2;
+ int i, j, len, k;
+
+ buf1 = kunit_kzalloc(test, buf_size, GFP_KERNEL);
+ buf2 = kunit_kzalloc(test, buf_size, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf1);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf2);
+
+ for (i = 0; i < max_offset; i++) {
+ for (j = 0; j < max_offset; j++) {
+ for (len = 0; len <= max_len; len++) {
+ memset(buf1, 'A', buf_size);
+ memset(buf2, 'A', buf_size);
+ KUNIT_EXPECT_EQ_MSG(test, memcmp(buf1 + i, buf2 + j, len), 0,
+ "Should be equal: i:%d j:%d len:%d", i, j, len);
+ for (k = 0; k < len; k++) {
+ memset(buf1, 'A', buf_size);
+ memset(buf2, 'A', buf_size);
+ buf2[j + k] = 'B';
+ int res = memcmp(buf1 + i, buf2 + j, len);
+
+ KUNIT_EXPECT_NE_MSG(test, res, 0,
+ "Should detect difference at k:%d (i:%d j:%d len:%d)",
+ k, i, j, len);
+
+ if (buf1[i + k] < buf2[j + k])
+ KUNIT_EXPECT_LT(test, res, 0);
+ else
+ KUNIT_EXPECT_GT(test, res, 0);
+ }
+ }
+ }
+ }
+}
+
+#if IS_ENABLED(CONFIG_STRING_KUNIT_BENCH)
+static void string_bench_memcmp(struct kunit *test)
+{
+ char *buf1, *buf2;
+ size_t lengths[] = { 1, 7, 8, 16, 32, 64, 128, 512, 1024, 4096};
+ int offsets[] = {0, 1, 3, 7};
+ const size_t max_len = 4096 + 64;
+
+ buf1 = vmalloc(max_len);
+ buf2 = vmalloc(max_len);
+
+ if (!buf1 || !buf2) {
+ vfree(buf1);
+ vfree(buf2);
+ kunit_err(test, "vmalloc failed\n");
+ return;
+ }
+
+ memset(buf1, 'A', max_len);
+ memset(buf2, 'A', max_len);
+
+ for (int i = 0; i < 100000; i++)
+ (void)memcmp(buf1, buf2, 4096);
+
+ for (int o = 0; o < ARRAY_SIZE(offsets); o++) {
+ int off = offsets[o];
+
+ for (int i = 0; i < ARRAY_SIZE(lengths); i++) {
+ size_t len = lengths[i];
+ char *p1 = buf1;
+ char *p2 = buf2 + off;
+
+ u32 iterations = (len < 512) ? 100000 : 10000;
+
+ for (u32 j = 0; j < iterations; j++) {
+ (void)memcmp(p1, p2, len);
+ barrier();
+ }
+
+ u64 elapsed = STRING_BENCH(iterations, memcmp, p1, p2, len);
+ u64 ns_per_call = div_u64(elapsed, iterations);
+ u64 mbps = len ? div_u64((u64)len * iterations * 1000,
+ elapsed) : 0;
+
+ if (off == 0) {
+ kunit_info(test, "bench_memcmp_aligned: len=%-4zu: %llu MB/s (%llu ns/call)\n",
+ len, mbps, ns_per_call);
+ } else {
+ kunit_info(test, "bench_memcmp_unaligned(off=%d): len=%-4zu: %llu MB/s (%llu ns/call)\n",
+ off, len, mbps, ns_per_call);
+ }
+ }
+ }
+
+ vfree(buf1);
+ vfree(buf2);
+}
+#else
+static void string_bench_memcmp(struct kunit *test)
+{
+ kunit_skip(test, "not enabled");
+}
+#endif
+
static struct kunit_case string_test_cases[] = {
KUNIT_CASE(string_test_memset16),
KUNIT_CASE(string_test_memset32),
@@ -910,6 +1014,8 @@ static struct kunit_case string_test_cases[] = {
KUNIT_CASE(string_bench_strnlen),
KUNIT_CASE(string_bench_strchr),
KUNIT_CASE(string_bench_strrchr),
+ KUNIT_CASE(string_test_memcmp),
+ KUNIT_CASE_SLOW(string_bench_memcmp),
{}
};
--
2.43.0
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 4+ messages in thread