* [PATCH 2/4] RISC-V: add alternative-field for bits to not match against
2023-01-13 21:23 [PATCH 0/4] Zbb + fast-unaligned string optimization Heiko Stuebner
2023-01-13 21:23 ` [PATCH 1/4] RISC-V: use bit-values instead of numbers to identify patched cpu-features Heiko Stuebner
@ 2023-01-13 21:23 ` Heiko Stuebner
2023-01-14 17:44 ` Conor Dooley
2023-01-17 12:41 ` Andrew Jones
2023-01-13 21:23 ` [PATCH 3/4] RISC-V: add cpufeature probing for fast-unaligned access Heiko Stuebner
` (2 subsequent siblings)
4 siblings, 2 replies; 9+ messages in thread
From: Heiko Stuebner @ 2023-01-13 21:23 UTC (permalink / raw)
To: linux-riscv, palmer
Cc: christoph.muellner, conor, philipp.tomsich, ajones, heiko,
jszhang, Heiko Stuebner
From: Heiko Stuebner <heiko.stuebner@vrull.eu>
Alternatives on RISC-V do not necessarily know about each other. So an
alternative is always defined by the new code and a vendor+"errata"
identifier and this whole block then points to the old code it wants to
possibly replace.
This is actually also a nice feature, as it reduces complexity and allows
different sources for alternatives (cpu-features, vendor-errata) to
co-exist.
When using a bitfield for cpufeatures to support combinations it creates
the need to also specify what to not match against.
For example an alternative for zbb could simply work for any core supporting
zbb but on the other hand it could also have a still better variant for
zbb + extension-x.
Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
---
arch/riscv/include/asm/alternative-macros.h | 64 +++++++++++----------
arch/riscv/include/asm/alternative.h | 1 +
arch/riscv/include/asm/errata_list.h | 18 +++---
arch/riscv/kernel/cpufeature.c | 3 +-
arch/riscv/lib/strcmp.S | 2 +-
arch/riscv/lib/strlen.S | 2 +-
arch/riscv/lib/strncmp.S | 2 +-
7 files changed, 48 insertions(+), 44 deletions(-)
diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
index 2c0f4c887289..b80ea0d15c67 100644
--- a/arch/riscv/include/asm/alternative-macros.h
+++ b/arch/riscv/include/asm/alternative-macros.h
@@ -6,18 +6,19 @@
#ifdef __ASSEMBLY__
-.macro ALT_ENTRY oldptr newptr vendor_id errata_id new_len
+.macro ALT_ENTRY oldptr newptr new_len vendor_id errata_id errata_not
RISCV_PTR \oldptr
RISCV_PTR \newptr
REG_ASM \vendor_id
REG_ASM \new_len
.word \errata_id
+ .word \errata_not
.endm
-.macro ALT_NEW_CONTENT vendor_id, errata_id, enable = 1, new_c : vararg
+.macro ALT_NEW_CONTENT vendor_id, errata_id, errata_not, enable = 1, new_c : vararg
.if \enable
.pushsection .alternative, "a"
- ALT_ENTRY 886b, 888f, \vendor_id, \errata_id, 889f - 888f
+ ALT_ENTRY 886b, 888f, 889f - 888f, \vendor_id, \errata_id, \errata_not
.popsection
.subsection 1
888 :
@@ -33,7 +34,7 @@
.endif
.endm
-.macro ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable
+.macro ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, errata_not, enable
886 :
.option push
.option norvc
@@ -41,13 +42,13 @@
\old_c
.option pop
887 :
- ALT_NEW_CONTENT \vendor_id, \errata_id, \enable, \new_c
+ ALT_NEW_CONTENT \vendor_id, \errata_id, \errata_not, \enable, \new_c
.endm
-.macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \
- new_c_2, vendor_id_2, errata_id_2, enable_2
- ALTERNATIVE_CFG "\old_c", "\new_c_1", \vendor_id_1, \errata_id_1, \enable_1
- ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2
+.macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, errata_not_1, enable_1, \
+ new_c_2, vendor_id_2, errata_id_2, errata_not_2, enable_2
+ ALTERNATIVE_CFG "\old_c", "\new_c_1", \vendor_id_1, \errata_id_1, \errata_not_1, \enable_1
+ ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \errata_not_2, \enable_2, \new_c_2
.endm
#define __ALTERNATIVE_CFG(...) ALTERNATIVE_CFG __VA_ARGS__
@@ -58,17 +59,18 @@
#include <asm/asm.h>
#include <linux/stringify.h>
-#define ALT_ENTRY(oldptr, newptr, vendor_id, errata_id, newlen) \
+#define ALT_ENTRY(oldptr, newptr, newlen, vendor_id, errata_id, errata_not) \
RISCV_PTR " " oldptr "\n" \
RISCV_PTR " " newptr "\n" \
REG_ASM " " vendor_id "\n" \
REG_ASM " " newlen "\n" \
- ".word " errata_id "\n"
+ ".word " errata_id "\n" \
+ ".word " errata_not "\n"
-#define ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) \
+#define ALT_NEW_CONTENT(vendor_id, errata_id, errata_not, enable, new_c) \
".if " __stringify(enable) " == 1\n" \
".pushsection .alternative, \"a\"\n" \
- ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \
+ ALT_ENTRY("886b", "888f", "889f - 888f", __stringify(vendor_id), __stringify(errata_id), __stringify(errata_not)) \
".popsection\n" \
".subsection 1\n" \
"888 :\n" \
@@ -83,7 +85,7 @@
".previous\n" \
".endif\n"
-#define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \
+#define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, errata_not, enable) \
"886 :\n" \
".option push\n" \
".option norvc\n" \
@@ -91,22 +93,22 @@
old_c "\n" \
".option pop\n" \
"887 :\n" \
- ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c)
+ ALT_NEW_CONTENT(vendor_id, errata_id, errata_not, enable, new_c)
-#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \
- new_c_2, vendor_id_2, errata_id_2, enable_2) \
- __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1) \
- ALT_NEW_CONTENT(vendor_id_2, errata_id_2, enable_2, new_c_2)
+#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, errata_not_1, enable_1, \
+ new_c_2, vendor_id_2, errata_id_2, errata_not_2, enable_2) \
+ __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, errata_not_1, enable_1) \
+ ALT_NEW_CONTENT(vendor_id_2, errata_id_2, errata_not_2, enable_2, new_c_2)
#endif /* __ASSEMBLY__ */
-#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
- __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k))
+#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, errata_not, CONFIG_k) \
+ __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, errata_not, IS_ENABLED(CONFIG_k))
-#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, CONFIG_k_1, \
- new_c_2, vendor_id_2, errata_id_2, CONFIG_k_2) \
- __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, IS_ENABLED(CONFIG_k_1), \
- new_c_2, vendor_id_2, errata_id_2, IS_ENABLED(CONFIG_k_2))
+#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, errata_not_1, CONFIG_k_1, \
+ new_c_2, vendor_id_2, errata_id_2, errata_not_2, CONFIG_k_2) \
+ __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, errata_not_1, IS_ENABLED(CONFIG_k_1), \
+ new_c_2, vendor_id_2, errata_id_2, errata_not_2, IS_ENABLED(CONFIG_k_2))
#else /* CONFIG_RISCV_ALTERNATIVE */
#ifdef __ASSEMBLY__
@@ -148,8 +150,8 @@
* CONFIG_k: The Kconfig of this errata. When Kconfig is disabled, the old
* content will alwyas be executed.
*/
-#define ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k) \
- _ALTERNATIVE_CFG(old_content, new_content, vendor_id, errata_id, CONFIG_k)
+#define ALTERNATIVE(old_content, new_content, vendor_id, errata_id, errata_not, CONFIG_k) \
+ _ALTERNATIVE_CFG(old_content, new_content, vendor_id, errata_id, errata_not, CONFIG_k)
/*
* A vendor wants to replace an old_content, but another vendor has used
@@ -158,9 +160,9 @@
* on the following sample code and then replace ALTERNATIVE() with
* ALTERNATIVE_2() to append its customized content.
*/
-#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \
- new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) \
- _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \
- new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2)
+#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, errata_id_1, errata_not_1, CONFIG_k_1, \
+ new_content_2, vendor_id_2, errata_id_2, errata_not_2, CONFIG_k_2) \
+ _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, errata_id_1, errata_not_1, CONFIG_k_1, \
+ new_content_2, vendor_id_2, errata_id_2, errata_not_2, CONFIG_k_2)
#endif
diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
index 1bd4027d34ca..d08c563ab7d8 100644
--- a/arch/riscv/include/asm/alternative.h
+++ b/arch/riscv/include/asm/alternative.h
@@ -36,6 +36,7 @@ struct alt_entry {
unsigned long vendor_id; /* cpu vendor id */
unsigned long alt_len; /* The replacement size */
unsigned int errata_id; /* The errata id */
+ unsigned int errata_not; /* Errata id not to match against */
} __packed;
struct errata_checkfunc_id {
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 40c9e9c3295b..043b79c79824 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -32,19 +32,19 @@
#define ALT_INSN_FAULT(x) \
ALTERNATIVE(__stringify(RISCV_PTR do_trap_insn_fault), \
__stringify(RISCV_PTR sifive_cip_453_insn_fault_trp), \
- SIFIVE_VENDOR_ID, ERRATA_SIFIVE_CIP_453, \
+ SIFIVE_VENDOR_ID, ERRATA_SIFIVE_CIP_453, 0, \
CONFIG_ERRATA_SIFIVE_CIP_453)
#define ALT_PAGE_FAULT(x) \
ALTERNATIVE(__stringify(RISCV_PTR do_page_fault), \
__stringify(RISCV_PTR sifive_cip_453_page_fault_trp), \
- SIFIVE_VENDOR_ID, ERRATA_SIFIVE_CIP_453, \
+ SIFIVE_VENDOR_ID, ERRATA_SIFIVE_CIP_453, 0, \
CONFIG_ERRATA_SIFIVE_CIP_453)
#else /* !__ASSEMBLY__ */
#define ALT_FLUSH_TLB_PAGE(x) \
asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \
- ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \
+ ERRATA_SIFIVE_CIP_1200, 0, CONFIG_ERRATA_SIFIVE_CIP_1200) \
: : "r" (addr) : "memory")
/*
@@ -56,9 +56,9 @@ asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \
#define ALT_SVPBMT(_val, prot) \
asm(ALTERNATIVE_2("li %0, 0\t\nnop", \
"li %0, %1\t\nslli %0,%0,%3", 0, \
- CPUFEATURE_SVPBMT, CONFIG_RISCV_ISA_SVPBMT, \
+ CPUFEATURE_SVPBMT, 0, CONFIG_RISCV_ISA_SVPBMT, \
"li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \
- ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \
+ ERRATA_THEAD_PBMT, 0, CONFIG_ERRATA_THEAD_PBMT) \
: "=r"(_val) \
: "I"(prot##_SVPBMT >> ALT_SVPBMT_SHIFT), \
"I"(prot##_THEAD >> ALT_THEAD_PBMT_SHIFT), \
@@ -82,7 +82,7 @@ asm volatile(ALTERNATIVE( \
"slli t3, t3, %3\n\t" \
"or %0, %0, t3\n\t" \
"2:", THEAD_VENDOR_ID, \
- ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \
+ ERRATA_THEAD_PBMT, 0, CONFIG_ERRATA_THEAD_PBMT) \
: "+r"(_val) \
: "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \
"I"(_PAGE_PMA_THEAD >> ALT_THEAD_PBMT_SHIFT), \
@@ -130,7 +130,7 @@ asm volatile(ALTERNATIVE_2( \
"add a0, a0, %0\n\t" \
"2:\n\t" \
"bltu a0, %2, 3b\n\t" \
- "nop", 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \
+ "nop", 0, CPUFEATURE_ZICBOM, 0, CONFIG_RISCV_ISA_ZICBOM, \
"mv a0, %1\n\t" \
"j 2f\n\t" \
"3:\n\t" \
@@ -139,7 +139,7 @@ asm volatile(ALTERNATIVE_2( \
"2:\n\t" \
"bltu a0, %2, 3b\n\t" \
THEAD_SYNC_S, THEAD_VENDOR_ID, \
- ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \
+ ERRATA_THEAD_CMO, 0, CONFIG_ERRATA_THEAD_CMO) \
: : "r"(_cachesize), \
"r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \
"r"((unsigned long)(_start) + (_size)) \
@@ -152,7 +152,7 @@ asm volatile(ALTERNATIVE_2( \
asm volatile(ALTERNATIVE( \
"csrr %0, " __stringify(CSR_SSCOUNTOVF), \
"csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \
- THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \
+ THEAD_VENDOR_ID, ERRATA_THEAD_PMU, 0, \
CONFIG_ERRATA_THEAD_PMU) \
: "=r" (__ovl) : \
: "memory")
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 8c83bd9d0e22..a65bebdadb68 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -377,7 +377,8 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
continue;
}
- if ((cpu_req_feature & alt->errata_id) == alt->errata_id) {
+ if ((cpu_req_feature & alt->errata_id) == alt->errata_id &&
+ (~cpu_req_feature & alt->errata_not)) {
patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
riscv_alternative_fix_offsets(alt->old_ptr, alt->alt_len,
alt->old_ptr - alt->alt_ptr);
diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S
index 8148b6418f61..ce85bbbee4b9 100644
--- a/arch/riscv/lib/strcmp.S
+++ b/arch/riscv/lib/strcmp.S
@@ -9,7 +9,7 @@
/* int strcmp(const char *cs, const char *ct) */
SYM_FUNC_START(strcmp)
- ALTERNATIVE("nop", "j strcmp_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
+ ALTERNATIVE_2("nop", "j strcmp_zbb", 0, CPUFEATURE_ZBB, 0, CONFIG_RISCV_ISA_ZBB)
/*
* Returns
diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S
index 0f9dbf93301a..8fdd53a734b4 100644
--- a/arch/riscv/lib/strlen.S
+++ b/arch/riscv/lib/strlen.S
@@ -9,7 +9,7 @@
/* int strlen(const char *s) */
SYM_FUNC_START(strlen)
- ALTERNATIVE("nop", "j strlen_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
+ ALTERNATIVE("nop", "j strlen_zbb", 0, CPUFEATURE_ZBB, 0, CONFIG_RISCV_ISA_ZBB)
/*
* Returns
diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
index 7940ddab2d48..e46ad168f1e4 100644
--- a/arch/riscv/lib/strncmp.S
+++ b/arch/riscv/lib/strncmp.S
@@ -9,7 +9,7 @@
/* int strncmp(const char *cs, const char *ct, size_t count) */
SYM_FUNC_START(strncmp)
- ALTERNATIVE("nop", "j strncmp_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
+ ALTERNATIVE("nop", "j strncmp_zbb", 0, CPUFEATURE_ZBB, 0, CONFIG_RISCV_ISA_ZBB)
/*
* Returns
--
2.35.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 4/4] RISC-V: add strcmp variant using zbb and fast-unaligned access
2023-01-13 21:23 [PATCH 0/4] Zbb + fast-unaligned string optimization Heiko Stuebner
` (2 preceding siblings ...)
2023-01-13 21:23 ` [PATCH 3/4] RISC-V: add cpufeature probing for fast-unaligned access Heiko Stuebner
@ 2023-01-13 21:23 ` Heiko Stuebner
2023-05-11 21:06 ` [PATCH 0/4] Zbb + fast-unaligned string optimization Palmer Dabbelt
4 siblings, 0 replies; 9+ messages in thread
From: Heiko Stuebner @ 2023-01-13 21:23 UTC (permalink / raw)
To: linux-riscv, palmer
Cc: christoph.muellner, conor, philipp.tomsich, ajones, heiko,
jszhang, Heiko Stuebner
From: Heiko Stuebner <heiko.stuebner@vrull.eu>
On cores that can do unaligned access fast in hardware,
there are some more optimizations possible, so add a second
strcmp variant for that case.
Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
---
arch/riscv/lib/strcmp.S | 170 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 169 insertions(+), 1 deletion(-)
diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S
index ce85bbbee4b9..53f41d032aae 100644
--- a/arch/riscv/lib/strcmp.S
+++ b/arch/riscv/lib/strcmp.S
@@ -9,7 +9,13 @@
/* int strcmp(const char *cs, const char *ct) */
SYM_FUNC_START(strcmp)
- ALTERNATIVE_2("nop", "j strcmp_zbb", 0, CPUFEATURE_ZBB, 0, CONFIG_RISCV_ISA_ZBB)
+ ALTERNATIVE_2("nop",
+ "j strcmp_zbb_unaligned", 0,
+ CPUFEATURE_ZBB | CPUFEATURE_FAST_UNALIGNED, 0,
+ CONFIG_RISCV_ISA_ZBB,
+ "j strcmp_zbb", 0,
+ CPUFEATURE_ZBB, CPUFEATURE_FAST_UNALIGNED,
+ CONFIG_RISCV_ISA_ZBB)
/*
* Returns
@@ -116,6 +122,168 @@ strcmp_zbb:
sub a0, t0, t1
ret
+strcmp_zbb_unaligned:
+
+ /*
+ * Returns
+ * a0 - comparison result, value like strcmp
+ *
+ * Parameters
+ * a0 - string1
+ * a1 - string2
+ *
+ * Clobbers
+ * a3, a4, a5, a6, a7, t0, t1, t2, t3, t4, t5
+ */
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# error big endian is untested!
+# define CZ ctz
+# define SHIFT srl
+# define SHIFT2 sll
+#else
+# define CZ ctz
+# define SHIFT sll
+# define SHIFT2 srl
+#endif
+
+ /* a3...delta from a0 to a1. */
+ sub a3, a1, a0
+ li a4, -1
+ andi a7, a3, SZREG-1
+ andi a5, a0, SZREG-1
+ bnez a7, 7f
+ bnez a5, 6f
+
+ .p2align 4
+1:
+ REG_L t0, 0(a0)
+ add a7, a0, a3
+ addi a0, a0, SZREG
+ REG_L t1, 0(a7)
+
+2:
+ orc.b t3, t0
+ bne t3, a4, 4f
+ beq t0, t1, 1b
+
+ /* Words don't match, and no NUL byte in one word.
+ Get bytes in big-endian order and compare as words. */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ rev8 t0, t0
+ rev8 t1, t1
+#endif
+ /* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
+ sltu a0, t0, t1
+ neg a0, a0
+ ori a0, a0, 1
+ ret
+
+3:
+ orc.b t3, t0
+4:
+ /* Words don't match or NUL byte in at least one word.
+ t3 holds orc.b value of t0. */
+ xor a7, t0, t1
+ orc.b a7, a7
+
+ orn a7, a7, t3
+ CZ t5, a7
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ rev8 t0, t0
+ rev8 t1, t1
+#endif
+ sll t0, t0, t5
+ sll t1, t1, t5
+ srl t0, t0, SZREG*8-8
+ srl t1, t1, SZREG*8-8
+
+5:
+ sub a0, t0, t1
+ ret
+
+ .p2align 4
+6:
+ /* Sources are mutually aligned, but are not currently at an
+ alignment boundary. Round down the addresses and then mask a3
+ the bytes that precede the start point. */
+ andi a0, a0, -SZREG
+ add a7, a0, a3
+ REG_L t0, 0(a0)
+ addi a0, a0, SZREG
+ REG_L t1, 0(a7)
+ /* Get number of bits to mask. */
+ sll t5, a1, 3
+ /* Bits to mask are now 0, others are 1. */
+ SHIFT a7, a4, t5
+ /* Or with inverted value -> masked bits become 1. */
+ orn t0, t0, a7
+ orn t1, t1, a7
+ j 2b
+
+7:
+ /* Skip slow loop if a0 is aligned. */
+ beqz a5, 9f
+8:
+ /* Align a0 to 8 bytes. */
+ lbu t0, 0(a0)
+ lbu t1, 0(a1)
+ beqz t0, 5b
+ bne t0, t1, 5b
+ addi a0, a0, 1
+ addi a1, a1, 1
+ andi a5, a0, SZREG-1
+ bnez a5, 8b
+
+9:
+ /* a0 is aligned. Align a1 down and check for NUL there.
+ * If there is no NUL, we may read the next word from a1.
+ * If there is a NUL, we must not read a complete word from a1
+ * because we might cross a page boundary. */
+ /* Get number of bits to mask (upper bits are ignored by shifts). */
+ sll t5, a1, 3
+ /* a6 := align_down (a1) */
+ andi a6, a1, -SZREG
+ REG_L t2, 0(a6)
+ addi a6, a6, SZREG
+
+ /* Bits to mask are now 0, others are 1. */
+ SHIFT a7, a4, t5
+ /* Or with inverted value -> masked bits become 1. */
+ orn t4, t2, a7
+ /* Check for NUL in next aligned word. */
+ orc.b t4, t4
+ bne t4, a4, 11f
+
+ .p2align 4
+10:
+ /* Read the (aligned) t0 and the unaligned t1. */
+ REG_L t0, 0(a0)
+ addi a0, a0, SZREG
+ REG_L t1, 0(a1)
+ addi a1, a1, SZREG
+ orc.b t3, t0
+ bne t3, a4, 4b
+ bne t0, t1, 4b
+
+ /* Read the next aligned-down word. */
+ REG_L t2, 0(a6)
+ addi a6, a6, SZREG
+ orc.b t4, t2
+ beq t4, a4, 10b
+
+11:
+ /* a0 points to unread word (only first bytes relevant).
+ * t2 holds next aligned-down word with NUL.
+ * Compare the first bytes of t0 with the last bytes of t2. */
+ REG_L t0, 0(a0)
+ /* Shift NUL bytes into t2 to become t1. */
+ SHIFT2 t1, t2, t5
+ bne t0, t1, 3b
+ li a0, 0
+ ret
+
.option pop
#endif
SYM_FUNC_END(strcmp)
--
2.35.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
^ permalink raw reply related [flat|nested] 9+ messages in thread