Linux-RISC-V Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Milan Tripkovic <milant2002@gmail.com>
To: pjw@kernel.org, palmer@dabbelt.com, aou@eecs.berkeley.edu
Cc: alex@ghiti.fr, linux-riscv@lists.infradead.org,
	linux-kernel@vger.kernel.org, Dusan.Stojkovic@rt-rk.com,
	Milan Tripkovic <Milan.Tripkovic@rt-rk.com>
Subject: [PATCH] riscv: lib: add strrchr() zbb implementation
Date: Thu, 14 May 2026 18:09:10 +0200	[thread overview]
Message-ID: <20260514160910.1796966-1-milant2002@gmail.com> (raw)

From: Milan Tripkovic <Milan.Tripkovic@rt-rk.com>

Add an zbb assembly implementation of strrchr() for RISC-V.
The implementation uses ZBB bit-manipulation instructions such as
orc.b, ctz, and clz to process multiple bytes per iteration and
significantly improve performance for longer strings compared to
the generic byte-by-byte implementation.

For the test case, I used the existing string_bench_strrchr benchmark,
but I changed the input character from '\0' to 'a' to obtain more
realistic results, because I added a check for '\0' in the assembly code.

Benchmark results (QEMU TCG, rv64):

  Len   | ZBB    | WoZBB | %ZBB/WoZBB
  ------|--------|--------|------------
  1 B   | 20.0   | 22.9   | -12.7%
  7 B   | 87.5   | 110.1  | -20.5%
  8 B   | 166.8  | 130.3  | +28.0%
  16 B  | 329.5  | 189.1  | +74.2%
  31 B  | 366.9  | 195.7  | +87.5%
  64 B  | 870.3  | 231.5  | +275.9%
  127 B | 1007.0 | 278.9  | +261.1%
  512 B | 1751.9 | 305.5  | +473.5%
  1024 B| 1841.9 | 294.7  | +525.0%
  2048 B| 1955.4 | 310.4  | +530.0%
  4096 B| 2034.6 | 312.5  | +551.1%

Signed-off-by: Milan Tripkovic <Milan.Tripkovic@rt-rk.com>
---
 arch/riscv/lib/strrchr.S | 129 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 128 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/lib/strrchr.S b/arch/riscv/lib/strrchr.S
index ac58b20ca21d..46ca232a6b43 100644
--- a/arch/riscv/lib/strrchr.S
+++ b/arch/riscv/lib/strrchr.S
@@ -6,13 +6,17 @@
 
 #include <linux/linkage.h>
 #include <asm/asm.h>
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
 
 /* char *strrchr(const char *s, int c) */
 SYM_FUNC_START(strrchr)
+	__ALTERNATIVE_CFG("nop", "j strrchr_zbb", 0, RISCV_ISA_EXT_ZBB,
+		IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
 	/*
 	 * Parameters
 	 *	a0 - The string to be searched
-	 *	a1 - The character to seaerch for
+	 *	a1 - The character to search for
 	 *
 	 * Returns
 	 *	a0 - Address of last occurrence of 'c' or 0
@@ -31,6 +35,129 @@ SYM_FUNC_START(strrchr)
 	addi	t1, t1, 1
 	bnez	t0, 1b
 	ret
+
+/*
+ * Variant of strrchr using the ZBB extension if available
+ */
+
+strrchr_zbb:
+.option push
+.option arch,+zbb
+	/*
+	 * Parameters
+	 *	a0 - The string to be searched
+	 *	a1 - The character to search for
+	 *
+	 * Returns
+	 *	a0 - Address of last occurrence of 'c' or 0
+	 *
+	 * Clobbers
+	 *	t0, t1, t2, t3, t4, t5, t6
+	 */
+	andi	a1, a1, 0xff
+	mv	t1, a0
+	li	a0, 0
+	beqz	a1, .Lfind_end_zbb
+
+	slli	t5, a1, 8
+	or	t5, t5, a1
+	slli	t2, t5, 16
+	or	t5, t5, t2
+#if __riscv_xlen == 64
+	slli	t2, t5, 32
+	or	t5, t5, t2
+#endif
+
+	andi	t2, t1, SZREG-1
+	bnez	t2, .Lmisaligned_start
+
+.Lmain_loop_pre:
+	li	t4, -1
+
+	.balign 16
+.Lmain_loop:
+	REG_L	t0, 0(t1)
+	addi	t1, t1, SZREG
+	xor	t6, t0, t5
+	orc.b	t2, t0
+	orc.b	t6, t6
+	and	t3, t2, t6
+	beq	t3, t4, .Lmain_loop
+
+	not	t2, t2
+	not	t6, t6
+
+	beqz	t2, .Lonly_matches
+
+	addi	t1, t1, -SZREG
+	ctz	t3, t2
+	sll	t4, t4, t3
+	andn	t6, t6, t4
+	beqz	t6, .Ldone
+
+	clz	t3, t6
+	srli	t3, t3, 3
+	xori	t3, t3, SZREG-1
+	add	a0, t1, t3
+.Ldone:
+	ret
+
+.Lonly_matches:
+	clz	t3, t6
+	srli	t3, t3, 3
+	not	t3, t3
+	add	a0, t1, t3
+	j	.Lmain_loop
+
+.Lfind_end_zbb:
+	andi	t2, t1, SZREG-1
+	bnez	t2, .Lmisaligned_end_start
+
+.Lfind_end_pre:
+	li	t4, -1
+
+	.balign 16
+.Lfind_end_loop:
+	REG_L	t0, 0(t1)
+	addi	t1, t1, SZREG
+	orc.b	t2, t0
+	beq	t2, t4, .Lfind_end_loop
+
+	addi	t1, t1, -SZREG
+	not	t2, t2
+	ctz	t3, t2
+	srli	t3, t3, 3
+	add	a0, t1, t3
+	ret
+
+.Lfound_zero:
+	mv	a0, t1
+	ret
+.Lmisaligned_start:
+	ori	t2, t1, SZREG-1
+	addi	t2, t2, 1
+.Lalign_loop:
+	lbu	t0, 0(t1)
+	beqz	t0, .Ldone
+	bne	t0, a1, 1f
+	mv	a0, t1
+1:
+	addi	t1, t1, 1
+	bne	t1, t2, .Lalign_loop
+	j	.Lmain_loop_pre
+
+.Lmisaligned_end_start:
+	ori	t2, t1, SZREG-1
+	addi	t2, t2, 1
+.Lfind_end_align:
+	lbu	t0, 0(t1)
+	beqz	t0, .Lfound_zero
+	addi	t1, t1, 1
+	bne	t1, t2, .Lfind_end_align
+	j	.Lfind_end_pre
+
+.option pop
+
 SYM_FUNC_END(strrchr)
 
 SYM_FUNC_ALIAS_WEAK(__pi_strrchr, strrchr)
-- 
2.43.0


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

                 reply	other threads:[~2026-05-14 16:10 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260514160910.1796966-1-milant2002@gmail.com \
    --to=milant2002@gmail.com \
    --cc=Dusan.Stojkovic@rt-rk.com \
    --cc=Milan.Tripkovic@rt-rk.com \
    --cc=alex@ghiti.fr \
    --cc=aou@eecs.berkeley.edu \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=palmer@dabbelt.com \
    --cc=pjw@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox