All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yu Chien Peter Lin <peterlin@andestech.com>
To: <u-boot@lists.denx.de>
Cc: <rick@andestech.com>, <ycliang@andestech.com>,
	<peterlin@andestech.com>, <kconsul@ventanamicro.com>,
	<bmeng@tinylab.org>
Subject: [PATCH] riscv: Add Zbb support for building U-Boot
Date: Wed, 9 Aug 2023 18:49:30 +0800	[thread overview]
Message-ID: <20230809104930.7462-1-peterlin@andestech.com> (raw)

This patch adds ISA string to the -march to generate zbb instructions
for U-Boot binaries, along with optimized string functions introduced
from Linux kernel.

Signed-off-by: Yu Chien Peter Lin <peterlin@andestech.com>
---
 arch/riscv/Kconfig              |  92 +++++++++++++++++++++++++++++
 arch/riscv/Makefile             |   5 +-
 arch/riscv/include/asm/string.h |  18 ++++++
 arch/riscv/lib/Makefile         |   3 +
 arch/riscv/lib/strcmp_zbb.S     |  81 +++++++++++++++++++++++++
 arch/riscv/lib/strlen_zbb.S     | 101 ++++++++++++++++++++++++++++++++
 arch/riscv/lib/strncmp_zbb.S    |  94 +++++++++++++++++++++++++++++
 7 files changed, 393 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/lib/strcmp_zbb.S
 create mode 100644 arch/riscv/lib/strlen_zbb.S
 create mode 100644 arch/riscv/lib/strncmp_zbb.S

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 867cbcbe74..49dd714e99 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -177,6 +177,98 @@ config RISCV_ISA_D
 	  riscv32 ABI from ilp32 to ilp32d and the riscv64 ABI from lp64 to
 	  lp64d.
 
+config RISCV_ISA_ZBB
+	bool "Zbb extension support for bit manipulation instructions"
+	default n
+	help
+	  Adds ZBB extension (basic bit manipulation) to the ISA subsets
+	  that the toolchain is allowed to emit when building U-Boot.
+	  The Zbb extension provides instructions to accelerate a number
+	  of bit-specific operations (count bit population, sign extending,
+	  bitrotation, etc) and enables optimized string routines.
+
+menu "Use assembly optimized implementation of string routines"
+
+config USE_ARCH_STRLEN
+	bool "Use an assembly optimized implementation of strlen"
+	default y
+	depends on RISCV_ISA_ZBB
+	help
+	  Enable the generation of an optimized version of strlen using
+	  Zbb extension.
+
+config SPL_USE_ARCH_STRLEN
+	bool "Use an assembly optimized implementation of strlen for SPL"
+	default y if USE_ARCH_STRLEN
+	depends on RISCV_ISA_ZBB
+	depends on SPL
+	help
+	  Enable the generation of an optimized version of strlen using
+	  Zbb extension.
+
+config TPL_USE_ARCH_STRLEN
+	bool "Use an assembly optimized implementation of strlen for TPL"
+	default y if USE_ARCH_STRLEN
+	depends on RISCV_ISA_ZBB
+	depends on TPL
+	help
+	  Enable the generation of an optimized version of strlen using
+	  Zbb extension.
+
+config USE_ARCH_STRCMP
+	bool "Use an assembly optimized implementation of strcmp"
+	default y
+	depends on RISCV_ISA_ZBB
+	help
+	  Enable the generation of an optimized version of strcmp using
+	  Zbb extension.
+
+config SPL_USE_ARCH_STRCMP
+	bool "Use an assembly optimized implementation of strcmp for SPL"
+	default y if USE_ARCH_STRCMP
+	depends on RISCV_ISA_ZBB
+	depends on SPL
+	help
+	  Enable the generation of an optimized version of strcmp using
+	  Zbb extension.
+
+config TPL_USE_ARCH_STRCMP
+	bool "Use an assembly optimized implementation of strcmp for TPL"
+	default y if USE_ARCH_STRCMP
+	depends on RISCV_ISA_ZBB
+	depends on TPL
+	help
+	  Enable the generation of an optimized version of strcmp using
+	  Zbb extension.
+
+config USE_ARCH_STRNCMP
+	bool "Use an assembly optimized implementation of strncmp"
+	default y
+	depends on RISCV_ISA_ZBB
+	help
+	  Enable the generation of an optimized version of strncmp using
+	  Zbb extension.
+
+config SPL_USE_ARCH_STRNCMP
+	bool "Use an assembly optimized implementation of strncmp for SPL"
+	default y if USE_ARCH_STRNCMP
+	depends on RISCV_ISA_ZBB
+	depends on SPL
+	help
+	  Enable the generation of an optimized version of strncmp using
+	  Zbb extension.
+
+config TPL_USE_ARCH_STRNCMP
+	bool "Use an assembly optimized implementation of strncmp for TPL"
+	default y if USE_ARCH_STRNCMP
+	depends on RISCV_ISA_ZBB
+	depends on TPL
+	help
+	  Enable the generation of an optimized version of strncmp using
+	  Zbb extension.
+
+endmenu
+
 config RISCV_ISA_A
 	def_bool y
 
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 4963b5109b..b3ef87078b 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -24,6 +24,9 @@ endif
 ifeq ($(CONFIG_RISCV_ISA_C),y)
 	ARCH_C = c
 endif
+ifeq ($(CONFIG_RISCV_ISA_ZBB),y)
+	ARCH_ZBB = _zbb
+endif
 ifeq ($(CONFIG_CMODEL_MEDLOW),y)
 	CMODEL = medlow
 endif
@@ -32,7 +35,7 @@ ifeq ($(CONFIG_CMODEL_MEDANY),y)
 endif
 
 
-RISCV_MARCH = $(ARCH_BASE)$(ARCH_A)$(ARCH_F)$(ARCH_D)$(ARCH_C)
+RISCV_MARCH = $(ARCH_BASE)$(ARCH_A)$(ARCH_F)$(ARCH_D)$(ARCH_C)$(ARCH_ZBB)
 ABI = $(ABI_BASE)$(ABI_D)
 
 # Newer binutils versions default to ISA spec version 20191213 which moves some
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 7dee3e4c9f..38ad85fe3b 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -40,4 +40,22 @@ extern void *memmove(void *, const void *, __kernel_size_t);
 #endif
 extern void *memset(void *, int, __kernel_size_t);
 
+#undef __HAVE_ARCH_STRLEN
+#if CONFIG_IS_ENABLED(USE_ARCH_STRLEN)
+#define __HAVE_ARCH_STRLEN
+#endif
+extern __kernel_size_t strlen(const char *);
+
+#undef __HAVE_ARCH_STRCMP
+#if CONFIG_IS_ENABLED(USE_ARCH_STRCMP)
+#define __HAVE_ARCH_STRCMP
+#endif
+extern int strcmp(const char *, const char *);
+
+#undef __HAVE_ARCH_STRNCMP
+#if CONFIG_IS_ENABLED(USE_ARCH_STRNCMP)
+#define __HAVE_ARCH_STRNCMP
+#endif
+extern int strncmp(const char *, const char *, size_t __kernel_size_t);
+
 #endif /* __ASM_RISCV_STRING_H */
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 02c4d8fcc6..9a05b662fd 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -42,5 +42,8 @@ extra-$(CONFIG_EFI) += $(EFI_CRT0) $(EFI_RELOC)
 obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMSET) += memset.o
 obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMMOVE) += memmove.o
 obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMCPY) += memcpy.o
+obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_STRLEN) += strlen_zbb.o
+obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_STRCMP) += strcmp_zbb.o
+obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_STRNCMP) += strncmp_zbb.o
 
 obj-$(CONFIG_$(SPL_TPL_)SEMIHOSTING) += semihosting.o
diff --git a/arch/riscv/lib/strcmp_zbb.S b/arch/riscv/lib/strcmp_zbb.S
new file mode 100644
index 0000000000..e5a367c0e5
--- /dev/null
+++ b/arch/riscv/lib/strcmp_zbb.S
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Taken from Linux arch/riscv/lib/strcmp.S
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+ENTRY(__strcmp)
+WEAK(strcmp)
+.option push
+.option arch,+zbb
+	/*
+	 * Returns
+	 *   a0 - comparison result, value like strcmp
+	 *
+	 * Parameters
+	 *   a0 - string1
+	 *   a1 - string2
+	 *
+	 * Clobbers
+	 *   t0, t1, t2, t3, t4
+	 */
+
+	or	t2, a0, a1
+	li	t4, -1
+	and	t2, t2, SZREG-1
+	bnez	t2, 3f
+
+	/* Main loop for aligned string.  */
+	.p2align 3
+1:
+	REG_L	t0, 0(a0)
+	REG_L	t1, 0(a1)
+	orc.b	t3, t0
+	bne	t3, t4, 2f
+	addi	a0, a0, SZREG
+	addi	a1, a1, SZREG
+	beq	t0, t1, 1b
+
+	/*
+	 * Words don't match, and no null byte in the first
+	 * word. Get bytes in big-endian order and compare.
+	 */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	rev8	t0, t0
+	rev8	t1, t1
+#endif
+
+	/* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
+	sltu	a0, t0, t1
+	neg	a0, a0
+	ori	a0, a0, 1
+	ret
+
+2:
+	/*
+	 * Found a null byte.
+	 * If words don't match, fall back to simple loop.
+	 */
+	bne	t0, t1, 3f
+
+	/* Otherwise, strings are equal. */
+	li	a0, 0
+	ret
+
+	/* Simple loop for misaligned strings. */
+	.p2align 3
+3:
+	lbu	t0, 0(a0)
+	lbu	t1, 0(a1)
+	addi	a0, a0, 1
+	addi	a1, a1, 1
+	bne	t0, t1, 4f
+	bnez	t0, 3b
+
+4:
+	sub	a0, t0, t1
+	ret
+.option pop
+END(__strcmp)
diff --git a/arch/riscv/lib/strlen_zbb.S b/arch/riscv/lib/strlen_zbb.S
new file mode 100644
index 0000000000..bd8fa4c0bd
--- /dev/null
+++ b/arch/riscv/lib/strlen_zbb.S
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Taken from Linux arch/riscv/lib/strlen.S
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define CZ	ctz
+# define SHIFT	srl
+#else
+# define CZ	clz
+# define SHIFT	sll
+#endif
+
+ENTRY(__strlen)
+WEAK(strlen)
+.option push
+.option arch,+zbb
+	/*
+	 * Returns
+	 *   a0 - string length
+	 *
+	 * Parameters
+	 *   a0 - String to measure
+	 *
+	 * Clobbers
+	 *   t0, t1, t2, t3
+	 */
+
+	/* Number of irrelevant bytes in the first word. */
+	andi	t2, a0, SZREG-1
+
+	/* Align pointer. */
+	andi	t0, a0, -SZREG
+
+	li	t3, SZREG
+	sub	t3, t3, t2
+	slli	t2, t2, 3
+
+	/* Get the first word.  */
+	REG_L	t1, 0(t0)
+
+	/*
+	 * Shift away the partial data we loaded to remove the irrelevant bytes
+	 * preceding the string with the effect of adding NUL bytes at the
+	 * end of the string's first word.
+	 */
+	SHIFT	t1, t1, t2
+
+	/* Convert non-NUL into 0xff and NUL into 0x00. */
+	orc.b	t1, t1
+
+	/* Convert non-NUL into 0x00 and NUL into 0xff. */
+	not	t1, t1
+
+	/*
+	 * Search for the first set bit (corresponding to a NUL byte in the
+	 * original chunk).
+	 */
+	CZ	t1, t1
+
+	/*
+	 * The first chunk is special: compare against the number
+	 * of valid bytes in this chunk.
+	 */
+	srli	a0, t1, 3
+	bgtu	t3, a0, 2f
+
+	/* Prepare for the word comparison loop. */
+	addi	t2, t0, SZREG
+	li	t3, -1
+
+	/*
+	 * Our critical loop is 4 instructions and processes data in
+	 * 4 byte or 8 byte chunks.
+	 */
+	.p2align 3
+1:
+	REG_L	t1, SZREG(t0)
+	addi	t0, t0, SZREG
+	orc.b	t1, t1
+	beq	t1, t3, 1b
+
+	not	t1, t1
+	CZ	t1, t1
+	srli	t1, t1, 3
+
+	/* Get number of processed bytes. */
+	sub	t2, t0, t2
+
+	/* Add number of characters in the first word.  */
+	add	a0, a0, t2
+
+	/* Add number of characters in the last word.  */
+	add	a0, a0, t1
+2:
+	ret
+.option pop
+END(__strlen)
diff --git a/arch/riscv/lib/strncmp_zbb.S b/arch/riscv/lib/strncmp_zbb.S
new file mode 100644
index 0000000000..00e0fec3a6
--- /dev/null
+++ b/arch/riscv/lib/strncmp_zbb.S
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Taken from Linux arch/riscv/lib/strncmp.S
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+ENTRY(__strncmp)
+WEAK(strncmp)
+.option push
+.option arch,+zbb
+	/*
+	 * Returns
+	 *   a0 - comparison result, like strncmp
+	 *
+	 * Parameters
+	 *   a0 - string1
+	 *   a1 - string2
+	 *   a2 - number of characters to compare
+	 *
+	 * Clobbers
+	 *   t0, t1, t2, t3, t4, t5, t6
+	 */
+
+	or	t2, a0, a1
+	li	t5, -1
+	and	t2, t2, SZREG-1
+	add	t4, a0, a2
+	bnez	t2, 3f
+
+	/* Adjust limit for fast-path.  */
+	andi	t6, t4, -SZREG
+
+	/* Main loop for aligned string.  */
+	.p2align 3
+1:
+	bge	a0, t6, 3f
+	REG_L	t0, 0(a0)
+	REG_L	t1, 0(a1)
+	orc.b	t3, t0
+	bne	t3, t5, 2f
+	orc.b	t3, t1
+	bne	t3, t5, 2f
+	addi	a0, a0, SZREG
+	addi	a1, a1, SZREG
+	beq	t0, t1, 1b
+
+	/*
+	 * Words don't match, and no null byte in the first
+	 * word. Get bytes in big-endian order and compare.
+	 */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	rev8	t0, t0
+	rev8	t1, t1
+#endif
+
+	/* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence.  */
+	sltu	a0, t0, t1
+	neg	a0, a0
+	ori	a0, a0, 1
+	ret
+
+2:
+	/*
+	 * Found a null byte.
+	 * If words don't match, fall back to simple loop.
+	 */
+	bne	t0, t1, 3f
+
+	/* Otherwise, strings are equal.  */
+	li	a0, 0
+	ret
+
+	/* Simple loop for misaligned strings.  */
+	.p2align 3
+3:
+	bge	a0, t4, 5f
+	lbu	t0, 0(a0)
+	lbu	t1, 0(a1)
+	addi	a0, a0, 1
+	addi	a1, a1, 1
+	bne	t0, t1, 4f
+	bnez	t0, 3b
+
+4:
+	sub	a0, t0, t1
+	ret
+
+5:
+	li	a0, 0
+	ret
+.option pop
+END(__strncmp)
-- 
2.34.1


             reply	other threads:[~2023-08-09 10:49 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-09 10:49 Yu Chien Peter Lin [this message]
2023-10-17  7:30 ` [PATCH] riscv: Add Zbb support for building U-Boot Leo Liang
2023-10-17 15:38 ` Tom Rini
2023-10-19  9:24   ` Leo Liang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230809104930.7462-1-peterlin@andestech.com \
    --to=peterlin@andestech.com \
    --cc=bmeng@tinylab.org \
    --cc=kconsul@ventanamicro.com \
    --cc=rick@andestech.com \
    --cc=u-boot@lists.denx.de \
    --cc=ycliang@andestech.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.