From: Yu Chien Peter Lin <peterlin@andestech.com>
To: <u-boot@lists.denx.de>
Cc: <rick@andestech.com>, <ycliang@andestech.com>,
<peterlin@andestech.com>, <kconsul@ventanamicro.com>,
<bmeng@tinylab.org>
Subject: [PATCH] riscv: Add Zbb support for building U-Boot
Date: Wed, 9 Aug 2023 18:49:30 +0800 [thread overview]
Message-ID: <20230809104930.7462-1-peterlin@andestech.com> (raw)
This patch adds ISA string to the -march to generate zbb instructions
for U-Boot binaries, along with optimized string functions introduced
from Linux kernel.
Signed-off-by: Yu Chien Peter Lin <peterlin@andestech.com>
---
arch/riscv/Kconfig | 92 +++++++++++++++++++++++++++++
arch/riscv/Makefile | 5 +-
arch/riscv/include/asm/string.h | 18 ++++++
arch/riscv/lib/Makefile | 3 +
arch/riscv/lib/strcmp_zbb.S | 81 +++++++++++++++++++++++++
arch/riscv/lib/strlen_zbb.S | 101 ++++++++++++++++++++++++++++++++
arch/riscv/lib/strncmp_zbb.S | 94 +++++++++++++++++++++++++++++
7 files changed, 393 insertions(+), 1 deletion(-)
create mode 100644 arch/riscv/lib/strcmp_zbb.S
create mode 100644 arch/riscv/lib/strlen_zbb.S
create mode 100644 arch/riscv/lib/strncmp_zbb.S
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 867cbcbe74..49dd714e99 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -177,6 +177,98 @@ config RISCV_ISA_D
riscv32 ABI from ilp32 to ilp32d and the riscv64 ABI from lp64 to
lp64d.
+config RISCV_ISA_ZBB
+ bool "Zbb extension support for bit manipulation instructions"
+ default n
+ help
+ Adds ZBB extension (basic bit manipulation) to the ISA subsets
+ that the toolchain is allowed to emit when building U-Boot.
+ The Zbb extension provides instructions to accelerate a number
+ of bit-specific operations (count bit population, sign extending,
+ bitrotation, etc) and enables optimized string routines.
+
+menu "Use assembly optimized implementation of string routines"
+
+config USE_ARCH_STRLEN
+ bool "Use an assembly optimized implementation of strlen"
+ default y
+ depends on RISCV_ISA_ZBB
+ help
+ Enable the generation of an optimized version of strlen using
+ Zbb extension.
+
+config SPL_USE_ARCH_STRLEN
+ bool "Use an assembly optimized implementation of strlen for SPL"
+ default y if USE_ARCH_STRLEN
+ depends on RISCV_ISA_ZBB
+ depends on SPL
+ help
+ Enable the generation of an optimized version of strlen using
+ Zbb extension.
+
+config TPL_USE_ARCH_STRLEN
+ bool "Use an assembly optimized implementation of strlen for TPL"
+ default y if USE_ARCH_STRLEN
+ depends on RISCV_ISA_ZBB
+ depends on TPL
+ help
+ Enable the generation of an optimized version of strlen using
+ Zbb extension.
+
+config USE_ARCH_STRCMP
+ bool "Use an assembly optimized implementation of strcmp"
+ default y
+ depends on RISCV_ISA_ZBB
+ help
+ Enable the generation of an optimized version of strcmp using
+ Zbb extension.
+
+config SPL_USE_ARCH_STRCMP
+ bool "Use an assembly optimized implementation of strcmp for SPL"
+ default y if USE_ARCH_STRCMP
+ depends on RISCV_ISA_ZBB
+ depends on SPL
+ help
+ Enable the generation of an optimized version of strcmp using
+ Zbb extension.
+
+config TPL_USE_ARCH_STRCMP
+ bool "Use an assembly optimized implementation of strcmp for TPL"
+ default y if USE_ARCH_STRCMP
+ depends on RISCV_ISA_ZBB
+ depends on TPL
+ help
+ Enable the generation of an optimized version of strcmp using
+ Zbb extension.
+
+config USE_ARCH_STRNCMP
+ bool "Use an assembly optimized implementation of strncmp"
+ default y
+ depends on RISCV_ISA_ZBB
+ help
+ Enable the generation of an optimized version of strncmp using
+ Zbb extension.
+
+config SPL_USE_ARCH_STRNCMP
+ bool "Use an assembly optimized implementation of strncmp for SPL"
+ default y if USE_ARCH_STRNCMP
+ depends on RISCV_ISA_ZBB
+ depends on SPL
+ help
+ Enable the generation of an optimized version of strncmp using
+ Zbb extension.
+
+config TPL_USE_ARCH_STRNCMP
+ bool "Use an assembly optimized implementation of strncmp for TPL"
+ default y if USE_ARCH_STRNCMP
+ depends on RISCV_ISA_ZBB
+ depends on TPL
+ help
+ Enable the generation of an optimized version of strncmp using
+ Zbb extension.
+
+endmenu
+
config RISCV_ISA_A
def_bool y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 4963b5109b..b3ef87078b 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -24,6 +24,9 @@ endif
ifeq ($(CONFIG_RISCV_ISA_C),y)
ARCH_C = c
endif
+ifeq ($(CONFIG_RISCV_ISA_ZBB),y)
+ ARCH_ZBB = _zbb
+endif
ifeq ($(CONFIG_CMODEL_MEDLOW),y)
CMODEL = medlow
endif
@@ -32,7 +35,7 @@ ifeq ($(CONFIG_CMODEL_MEDANY),y)
endif
-RISCV_MARCH = $(ARCH_BASE)$(ARCH_A)$(ARCH_F)$(ARCH_D)$(ARCH_C)
+RISCV_MARCH = $(ARCH_BASE)$(ARCH_A)$(ARCH_F)$(ARCH_D)$(ARCH_C)$(ARCH_ZBB)
ABI = $(ABI_BASE)$(ABI_D)
# Newer binutils versions default to ISA spec version 20191213 which moves some
diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 7dee3e4c9f..38ad85fe3b 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -40,4 +40,22 @@ extern void *memmove(void *, const void *, __kernel_size_t);
#endif
extern void *memset(void *, int, __kernel_size_t);
+#undef __HAVE_ARCH_STRLEN
+#if CONFIG_IS_ENABLED(USE_ARCH_STRLEN)
+#define __HAVE_ARCH_STRLEN
+#endif
+extern __kernel_size_t strlen(const char *);
+
+#undef __HAVE_ARCH_STRCMP
+#if CONFIG_IS_ENABLED(USE_ARCH_STRCMP)
+#define __HAVE_ARCH_STRCMP
+#endif
+extern int strcmp(const char *, const char *);
+
+#undef __HAVE_ARCH_STRNCMP
+#if CONFIG_IS_ENABLED(USE_ARCH_STRNCMP)
+#define __HAVE_ARCH_STRNCMP
+#endif
+extern int strncmp(const char *, const char *, size_t __kernel_size_t);
+
#endif /* __ASM_RISCV_STRING_H */
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 02c4d8fcc6..9a05b662fd 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -42,5 +42,8 @@ extra-$(CONFIG_EFI) += $(EFI_CRT0) $(EFI_RELOC)
obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMSET) += memset.o
obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMMOVE) += memmove.o
obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_MEMCPY) += memcpy.o
+obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_STRLEN) += strlen_zbb.o
+obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_STRCMP) += strcmp_zbb.o
+obj-$(CONFIG_$(SPL_TPL_)USE_ARCH_STRNCMP) += strncmp_zbb.o
obj-$(CONFIG_$(SPL_TPL_)SEMIHOSTING) += semihosting.o
diff --git a/arch/riscv/lib/strcmp_zbb.S b/arch/riscv/lib/strcmp_zbb.S
new file mode 100644
index 0000000000..e5a367c0e5
--- /dev/null
+++ b/arch/riscv/lib/strcmp_zbb.S
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Taken from Linux arch/riscv/lib/strcmp.S
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+ENTRY(__strcmp)
+WEAK(strcmp)
+.option push
+.option arch,+zbb
+ /*
+ * Returns
+ * a0 - comparison result, value like strcmp
+ *
+ * Parameters
+ * a0 - string1
+ * a1 - string2
+ *
+ * Clobbers
+ * t0, t1, t2, t3, t4
+ */
+
+ or t2, a0, a1
+ li t4, -1
+ and t2, t2, SZREG-1
+ bnez t2, 3f
+
+ /* Main loop for aligned string. */
+ .p2align 3
+1:
+ REG_L t0, 0(a0)
+ REG_L t1, 0(a1)
+ orc.b t3, t0
+ bne t3, t4, 2f
+ addi a0, a0, SZREG
+ addi a1, a1, SZREG
+ beq t0, t1, 1b
+
+ /*
+ * Words don't match, and no null byte in the first
+ * word. Get bytes in big-endian order and compare.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ rev8 t0, t0
+ rev8 t1, t1
+#endif
+
+ /* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
+ sltu a0, t0, t1
+ neg a0, a0
+ ori a0, a0, 1
+ ret
+
+2:
+ /*
+ * Found a null byte.
+ * If words don't match, fall back to simple loop.
+ */
+ bne t0, t1, 3f
+
+ /* Otherwise, strings are equal. */
+ li a0, 0
+ ret
+
+ /* Simple loop for misaligned strings. */
+ .p2align 3
+3:
+ lbu t0, 0(a0)
+ lbu t1, 0(a1)
+ addi a0, a0, 1
+ addi a1, a1, 1
+ bne t0, t1, 4f
+ bnez t0, 3b
+
+4:
+ sub a0, t0, t1
+ ret
+.option pop
+END(__strcmp)
diff --git a/arch/riscv/lib/strlen_zbb.S b/arch/riscv/lib/strlen_zbb.S
new file mode 100644
index 0000000000..bd8fa4c0bd
--- /dev/null
+++ b/arch/riscv/lib/strlen_zbb.S
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Taken from Linux arch/riscv/lib/strlen.S
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define CZ ctz
+# define SHIFT srl
+#else
+# define CZ clz
+# define SHIFT sll
+#endif
+
+ENTRY(__strlen)
+WEAK(strlen)
+.option push
+.option arch,+zbb
+ /*
+ * Returns
+ * a0 - string length
+ *
+ * Parameters
+ * a0 - String to measure
+ *
+ * Clobbers
+ * t0, t1, t2, t3
+ */
+
+ /* Number of irrelevant bytes in the first word. */
+ andi t2, a0, SZREG-1
+
+ /* Align pointer. */
+ andi t0, a0, -SZREG
+
+ li t3, SZREG
+ sub t3, t3, t2
+ slli t2, t2, 3
+
+ /* Get the first word. */
+ REG_L t1, 0(t0)
+
+ /*
+ * Shift away the partial data we loaded to remove the irrelevant bytes
+ * preceding the string with the effect of adding NUL bytes at the
+ * end of the string's first word.
+ */
+ SHIFT t1, t1, t2
+
+ /* Convert non-NUL into 0xff and NUL into 0x00. */
+ orc.b t1, t1
+
+ /* Convert non-NUL into 0x00 and NUL into 0xff. */
+ not t1, t1
+
+ /*
+ * Search for the first set bit (corresponding to a NUL byte in the
+ * original chunk).
+ */
+ CZ t1, t1
+
+ /*
+ * The first chunk is special: compare against the number
+ * of valid bytes in this chunk.
+ */
+ srli a0, t1, 3
+ bgtu t3, a0, 2f
+
+ /* Prepare for the word comparison loop. */
+ addi t2, t0, SZREG
+ li t3, -1
+
+ /*
+ * Our critical loop is 4 instructions and processes data in
+ * 4 byte or 8 byte chunks.
+ */
+ .p2align 3
+1:
+ REG_L t1, SZREG(t0)
+ addi t0, t0, SZREG
+ orc.b t1, t1
+ beq t1, t3, 1b
+
+ not t1, t1
+ CZ t1, t1
+ srli t1, t1, 3
+
+ /* Get number of processed bytes. */
+ sub t2, t0, t2
+
+ /* Add number of characters in the first word. */
+ add a0, a0, t2
+
+ /* Add number of characters in the last word. */
+ add a0, a0, t1
+2:
+ ret
+.option pop
+END(__strlen)
diff --git a/arch/riscv/lib/strncmp_zbb.S b/arch/riscv/lib/strncmp_zbb.S
new file mode 100644
index 0000000000..00e0fec3a6
--- /dev/null
+++ b/arch/riscv/lib/strncmp_zbb.S
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Taken from Linux arch/riscv/lib/strncmp.S
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+ENTRY(__strncmp)
+WEAK(strncmp)
+.option push
+.option arch,+zbb
+ /*
+ * Returns
+ * a0 - comparison result, like strncmp
+ *
+ * Parameters
+ * a0 - string1
+ * a1 - string2
+ * a2 - number of characters to compare
+ *
+ * Clobbers
+ * t0, t1, t2, t3, t4, t5, t6
+ */
+
+ or t2, a0, a1
+ li t5, -1
+ and t2, t2, SZREG-1
+ add t4, a0, a2
+ bnez t2, 3f
+
+ /* Adjust limit for fast-path. */
+ andi t6, t4, -SZREG
+
+ /* Main loop for aligned string. */
+ .p2align 3
+1:
+ bge a0, t6, 3f
+ REG_L t0, 0(a0)
+ REG_L t1, 0(a1)
+ orc.b t3, t0
+ bne t3, t5, 2f
+ orc.b t3, t1
+ bne t3, t5, 2f
+ addi a0, a0, SZREG
+ addi a1, a1, SZREG
+ beq t0, t1, 1b
+
+ /*
+ * Words don't match, and no null byte in the first
+ * word. Get bytes in big-endian order and compare.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ rev8 t0, t0
+ rev8 t1, t1
+#endif
+
+ /* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
+ sltu a0, t0, t1
+ neg a0, a0
+ ori a0, a0, 1
+ ret
+
+2:
+ /*
+ * Found a null byte.
+ * If words don't match, fall back to simple loop.
+ */
+ bne t0, t1, 3f
+
+ /* Otherwise, strings are equal. */
+ li a0, 0
+ ret
+
+ /* Simple loop for misaligned strings. */
+ .p2align 3
+3:
+ bge a0, t4, 5f
+ lbu t0, 0(a0)
+ lbu t1, 0(a1)
+ addi a0, a0, 1
+ addi a1, a1, 1
+ bne t0, t1, 4f
+ bnez t0, 3b
+
+4:
+ sub a0, t0, t1
+ ret
+
+5:
+ li a0, 0
+ ret
+.option pop
+END(__strncmp)
--
2.34.1
next reply other threads:[~2023-08-09 10:49 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-09 10:49 Yu Chien Peter Lin [this message]
2023-10-17 7:30 ` [PATCH] riscv: Add Zbb support for building U-Boot Leo Liang
2023-10-17 15:38 ` Tom Rini
2023-10-19 9:24 ` Leo Liang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230809104930.7462-1-peterlin@andestech.com \
--to=peterlin@andestech.com \
--cc=bmeng@tinylab.org \
--cc=kconsul@ventanamicro.com \
--cc=rick@andestech.com \
--cc=u-boot@lists.denx.de \
--cc=ycliang@andestech.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.