Linux-RISC-V Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] riscv: lib: add strrchr() zbb implementation
@ 2026-05-14 16:09 Milan Tripkovic
  0 siblings, 0 replies; only message in thread
From: Milan Tripkovic @ 2026-05-14 16:09 UTC (permalink / raw)
  To: pjw, palmer, aou
  Cc: alex, linux-riscv, linux-kernel, Dusan.Stojkovic, Milan Tripkovic

From: Milan Tripkovic <Milan.Tripkovic@rt-rk.com>

Add an zbb assembly implementation of strrchr() for RISC-V.
The implementation uses ZBB bit-manipulation instructions such as
orc.b, ctz, and clz to process multiple bytes per iteration and
significantly improve performance for longer strings compared to
the generic byte-by-byte implementation.

For the test case, I used the existing string_bench_strrchr benchmark,
but I changed the input character from '\0' to 'a' to obtain more
realistic results, because I added a check for '\0' in the assembly code.

Benchmark results (QEMU TCG, rv64):

  Len   | ZBB    | WoZBB | %ZBB/WoZBB
  ------|--------|--------|------------
  1 B   | 20.0   | 22.9   | -12.7%
  7 B   | 87.5   | 110.1  | -20.5%
  8 B   | 166.8  | 130.3  | +28.0%
  16 B  | 329.5  | 189.1  | +74.2%
  31 B  | 366.9  | 195.7  | +87.5%
  64 B  | 870.3  | 231.5  | +275.9%
  127 B | 1007.0 | 278.9  | +261.1%
  512 B | 1751.9 | 305.5  | +473.5%
  1024 B| 1841.9 | 294.7  | +525.0%
  2048 B| 1955.4 | 310.4  | +530.0%
  4096 B| 2034.6 | 312.5  | +551.1%

Signed-off-by: Milan Tripkovic <Milan.Tripkovic@rt-rk.com>
---
 arch/riscv/lib/strrchr.S | 129 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 128 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/lib/strrchr.S b/arch/riscv/lib/strrchr.S
index ac58b20ca21d..46ca232a6b43 100644
--- a/arch/riscv/lib/strrchr.S
+++ b/arch/riscv/lib/strrchr.S
@@ -6,13 +6,17 @@
 
 #include <linux/linkage.h>
 #include <asm/asm.h>
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
 
 /* char *strrchr(const char *s, int c) */
 SYM_FUNC_START(strrchr)
+	__ALTERNATIVE_CFG("nop", "j strrchr_zbb", 0, RISCV_ISA_EXT_ZBB,
+		IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
 	/*
 	 * Parameters
 	 *	a0 - The string to be searched
-	 *	a1 - The character to seaerch for
+	 *	a1 - The character to search for
 	 *
 	 * Returns
 	 *	a0 - Address of last occurrence of 'c' or 0
@@ -31,6 +35,129 @@ SYM_FUNC_START(strrchr)
 	addi	t1, t1, 1
 	bnez	t0, 1b
 	ret
+
+/*
+ * Variant of strrchr using the ZBB extension if available
+ */
+
+strrchr_zbb:
+.option push
+.option arch,+zbb
+	/*
+	 * Parameters
+	 *	a0 - The string to be searched
+	 *	a1 - The character to search for
+	 *
+	 * Returns
+	 *	a0 - Address of last occurrence of 'c' or 0
+	 *
+	 * Clobbers
+	 *	t0, t1, t2, t3, t4, t5, t6
+	 */
+	andi	a1, a1, 0xff
+	mv	t1, a0
+	li	a0, 0
+	beqz	a1, .Lfind_end_zbb
+
+	slli	t5, a1, 8
+	or	t5, t5, a1
+	slli	t2, t5, 16
+	or	t5, t5, t2
+#if __riscv_xlen == 64
+	slli	t2, t5, 32
+	or	t5, t5, t2
+#endif
+
+	andi	t2, t1, SZREG-1
+	bnez	t2, .Lmisaligned_start
+
+.Lmain_loop_pre:
+	li	t4, -1
+
+	.balign 16
+.Lmain_loop:
+	REG_L	t0, 0(t1)
+	addi	t1, t1, SZREG
+	xor	t6, t0, t5
+	orc.b	t2, t0
+	orc.b	t6, t6
+	and	t3, t2, t6
+	beq	t3, t4, .Lmain_loop
+
+	not	t2, t2
+	not	t6, t6
+
+	beqz	t2, .Lonly_matches
+
+	addi	t1, t1, -SZREG
+	ctz	t3, t2
+	sll	t4, t4, t3
+	andn	t6, t6, t4
+	beqz	t6, .Ldone
+
+	clz	t3, t6
+	srli	t3, t3, 3
+	xori	t3, t3, SZREG-1
+	add	a0, t1, t3
+.Ldone:
+	ret
+
+.Lonly_matches:
+	clz	t3, t6
+	srli	t3, t3, 3
+	not	t3, t3
+	add	a0, t1, t3
+	j	.Lmain_loop
+
+.Lfind_end_zbb:
+	andi	t2, t1, SZREG-1
+	bnez	t2, .Lmisaligned_end_start
+
+.Lfind_end_pre:
+	li	t4, -1
+
+	.balign 16
+.Lfind_end_loop:
+	REG_L	t0, 0(t1)
+	addi	t1, t1, SZREG
+	orc.b	t2, t0
+	beq	t2, t4, .Lfind_end_loop
+
+	addi	t1, t1, -SZREG
+	not	t2, t2
+	ctz	t3, t2
+	srli	t3, t3, 3
+	add	a0, t1, t3
+	ret
+
+.Lfound_zero:
+	mv	a0, t1
+	ret
+.Lmisaligned_start:
+	ori	t2, t1, SZREG-1
+	addi	t2, t2, 1
+.Lalign_loop:
+	lbu	t0, 0(t1)
+	beqz	t0, .Ldone
+	bne	t0, a1, 1f
+	mv	a0, t1
+1:
+	addi	t1, t1, 1
+	bne	t1, t2, .Lalign_loop
+	j	.Lmain_loop_pre
+
+.Lmisaligned_end_start:
+	ori	t2, t1, SZREG-1
+	addi	t2, t2, 1
+.Lfind_end_align:
+	lbu	t0, 0(t1)
+	beqz	t0, .Lfound_zero
+	addi	t1, t1, 1
+	bne	t1, t2, .Lfind_end_align
+	j	.Lfind_end_pre
+
+.option pop
+
 SYM_FUNC_END(strrchr)
 
 SYM_FUNC_ALIAS_WEAK(__pi_strrchr, strrchr)
-- 
2.43.0


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2026-05-14 16:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-14 16:09 [PATCH] riscv: lib: add strrchr() zbb implementation Milan Tripkovic

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox