From: Andrew Jones <ajones@ventanamicro.com>
To: kvm-riscv@lists.infradead.org
Subject: [PATCH 9/9] RISC-V: Use Zicboz in memset when available
Date: Thu, 27 Oct 2022 15:02:47 +0200 [thread overview]
Message-ID: <20221027130247.31634-10-ajones@ventanamicro.com> (raw)
In-Reply-To: <20221027130247.31634-1-ajones@ventanamicro.com>
RISC-V has an optimized memset() which does byte by byte writes up to
the first sizeof(long) aligned address, then uses Duff's device until
the last sizeof(long) aligned address, and finally byte by byte to
the end. When memset is used to zero memory and the Zicboz extension
is available, then we can extend that by doing the optimized memset
up to the first Zicboz block size aligned address, then use the
Zicboz zero instruction for each block to the last block size aligned
address, and finally the optimized memset to the end.
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
arch/riscv/lib/memset.S | 81 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 81 insertions(+)
diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S
index 74e4c7feec00..786b85b5e9cc 100644
--- a/arch/riscv/lib/memset.S
+++ b/arch/riscv/lib/memset.S
@@ -5,6 +5,12 @@
#include <linux/linkage.h>
#include <asm/asm.h>
+#include <asm/alternative-macros.h>
+#include <asm/insn-def.h>
+#include <asm/hwcap.h>
+
+#define ALT_ZICBOZ(old, new) ALTERNATIVE(old, new, 0, RISCV_ISA_EXT_ZICBOZ, \
+ CONFIG_RISCV_ISA_ZICBOZ)
/* void *memset(void *, int, size_t) */
ENTRY(__memset)
@@ -15,6 +21,58 @@ WEAK(memset)
sltiu a3, a2, 16
bnez a3, .Lfinish
+#ifdef CONFIG_RISCV_ISA_ZICBOZ
+ ALT_ZICBOZ("j .Ldo_memset", "nop")
+ /*
+ * t1 will be the Zicboz block size.
+ * Zero means we're not using Zicboz, and we don't when a1 != 0
+ */
+ li t1, 0
+ bnez a1, .Ldo_memset
+ la a3, riscv_cboz_block_size
+ lw t1, 0(a3)
+
+ /*
+ * Round to nearest Zicboz block-aligned address
+ * greater than or equal to the start address.
+ */
+ addi a3, t1, -1
+ not t2, a3 /* t2 is Zicboz block size mask */
+ add a3, t0, a3
+ and t3, a3, t2 /* t3 is Zicboz block aligned start */
+
+ /* Did we go too far or not have at least one block? */
+ add a3, a0, a2
+ and a3, a3, t2
+ bgtu a3, t3, .Ldo_zero
+ li t1, 0
+ j .Ldo_memset
+
+.Ldo_zero:
+ /* Use Duff for initial bytes if there are any */
+ bne t3, t0, .Ldo_memset
+
+.Ldo_zero2:
+ /* Calculate end address */
+ and a3, a2, t2
+ add a3, t0, a3
+ sub a4, a3, t0
+
+.Lzero_loop:
+ CBO_ZERO(t0)
+ add t0, t0, t1
+ bltu t0, a3, .Lzero_loop
+ li t1, 0 /* We're done with Zicboz */
+
+ sub a2, a2, a4 /* Update count */
+ sltiu a3, a2, 16
+ bnez a3, .Lfinish
+
+ /* t0 is Zicboz block size aligned, so it must be SZREG aligned */
+ j .Ldo_duff3
+#endif
+
+.Ldo_memset:
/*
* Round to nearest XLEN-aligned address
* greater than or equal to the start address.
@@ -33,6 +91,18 @@ WEAK(memset)
.Ldo_duff:
/* Duff's device with 32 XLEN stores per iteration */
+
+#ifdef CONFIG_RISCV_ISA_ZICBOZ
+ ALT_ZICBOZ("j .Ldo_duff2", "nop")
+ beqz t1, .Ldo_duff2
+ /* a3, "end", is start of block aligned start. a1 is 0 */
+ move a3, t3
+ sub a4, a3, t0 /* a4 is SZREG aligned count */
+ move t4, a4 /* Save count for later, see below. */
+ j .Ldo_duff4
+#endif
+
+.Ldo_duff2:
/* Broadcast value into all bytes */
andi a1, a1, 0xff
slli a3, a1, 8
@@ -44,10 +114,12 @@ WEAK(memset)
or a1, a3, a1
#endif
+.Ldo_duff3:
/* Calculate end address */
andi a4, a2, ~(SZREG-1)
add a3, t0, a4
+.Ldo_duff4:
andi a4, a4, 31*SZREG /* Calculate remainder */
beqz a4, .Lduff_loop /* Shortcut if no remainder */
neg a4, a4
@@ -100,6 +172,15 @@ WEAK(memset)
addi t0, t0, 32*SZREG
bltu t0, a3, .Lduff_loop
+
+#ifdef CONFIG_RISCV_ISA_ZICBOZ
+ ALT_ZICBOZ("j .Lcount_update", "nop")
+ beqz t1, .Lcount_update
+ sub a2, a2, t4 /* Difference was saved above */
+ j .Ldo_zero2
+#endif
+
+.Lcount_update:
andi a2, a2, SZREG-1 /* Update count */
.Lfinish:
--
2.37.3
WARNING: multiple messages have this Message-ID (diff)
From: Andrew Jones <ajones@ventanamicro.com>
To: linux-riscv@lists.infradead.org, kvm-riscv@lists.infradead.org
Cc: Paul Walmsley <paul.walmsley@sifive.com>,
Palmer Dabbelt <palmer@dabbelt.com>,
Albert Ou <aou@eecs.berkeley.edu>,
Anup Patel <apatel@ventanamicro.com>,
Heiko Stuebner <heiko@sntech.de>,
Conor Dooley <conor.dooley@microchip.com>,
Atish Patra <atishp@rivosinc.com>,
Jisheng Zhang <jszhang@kernel.org>
Subject: [PATCH 9/9] RISC-V: Use Zicboz in memset when available
Date: Thu, 27 Oct 2022 15:02:47 +0200 [thread overview]
Message-ID: <20221027130247.31634-10-ajones@ventanamicro.com> (raw)
In-Reply-To: <20221027130247.31634-1-ajones@ventanamicro.com>
RISC-V has an optimized memset() which does byte by byte writes up to
the first sizeof(long) aligned address, then uses Duff's device until
the last sizeof(long) aligned address, and finally byte by byte to
the end. When memset is used to zero memory and the Zicboz extension
is available, then we can extend that by doing the optimized memset
up to the first Zicboz block size aligned address, then use the
Zicboz zero instruction for each block to the last block size aligned
address, and finally the optimized memset to the end.
Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
arch/riscv/lib/memset.S | 81 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 81 insertions(+)
diff --git a/arch/riscv/lib/memset.S b/arch/riscv/lib/memset.S
index 74e4c7feec00..786b85b5e9cc 100644
--- a/arch/riscv/lib/memset.S
+++ b/arch/riscv/lib/memset.S
@@ -5,6 +5,12 @@
#include <linux/linkage.h>
#include <asm/asm.h>
+#include <asm/alternative-macros.h>
+#include <asm/insn-def.h>
+#include <asm/hwcap.h>
+
+#define ALT_ZICBOZ(old, new) ALTERNATIVE(old, new, 0, RISCV_ISA_EXT_ZICBOZ, \
+ CONFIG_RISCV_ISA_ZICBOZ)
/* void *memset(void *, int, size_t) */
ENTRY(__memset)
@@ -15,6 +21,58 @@ WEAK(memset)
sltiu a3, a2, 16
bnez a3, .Lfinish
+#ifdef CONFIG_RISCV_ISA_ZICBOZ
+ ALT_ZICBOZ("j .Ldo_memset", "nop")
+ /*
+ * t1 will be the Zicboz block size.
+ * Zero means we're not using Zicboz, and we don't when a1 != 0
+ */
+ li t1, 0
+ bnez a1, .Ldo_memset
+ la a3, riscv_cboz_block_size
+ lw t1, 0(a3)
+
+ /*
+ * Round to nearest Zicboz block-aligned address
+ * greater than or equal to the start address.
+ */
+ addi a3, t1, -1
+ not t2, a3 /* t2 is Zicboz block size mask */
+ add a3, t0, a3
+ and t3, a3, t2 /* t3 is Zicboz block aligned start */
+
+ /* Did we go too far or not have at least one block? */
+ add a3, a0, a2
+ and a3, a3, t2
+ bgtu a3, t3, .Ldo_zero
+ li t1, 0
+ j .Ldo_memset
+
+.Ldo_zero:
+ /* Use Duff for initial bytes if there are any */
+ bne t3, t0, .Ldo_memset
+
+.Ldo_zero2:
+ /* Calculate end address */
+ and a3, a2, t2
+ add a3, t0, a3
+ sub a4, a3, t0
+
+.Lzero_loop:
+ CBO_ZERO(t0)
+ add t0, t0, t1
+ bltu t0, a3, .Lzero_loop
+ li t1, 0 /* We're done with Zicboz */
+
+ sub a2, a2, a4 /* Update count */
+ sltiu a3, a2, 16
+ bnez a3, .Lfinish
+
+ /* t0 is Zicboz block size aligned, so it must be SZREG aligned */
+ j .Ldo_duff3
+#endif
+
+.Ldo_memset:
/*
* Round to nearest XLEN-aligned address
* greater than or equal to the start address.
@@ -33,6 +91,18 @@ WEAK(memset)
.Ldo_duff:
/* Duff's device with 32 XLEN stores per iteration */
+
+#ifdef CONFIG_RISCV_ISA_ZICBOZ
+ ALT_ZICBOZ("j .Ldo_duff2", "nop")
+ beqz t1, .Ldo_duff2
+ /* a3, "end", is start of block aligned start. a1 is 0 */
+ move a3, t3
+ sub a4, a3, t0 /* a4 is SZREG aligned count */
+ move t4, a4 /* Save count for later, see below. */
+ j .Ldo_duff4
+#endif
+
+.Ldo_duff2:
/* Broadcast value into all bytes */
andi a1, a1, 0xff
slli a3, a1, 8
@@ -44,10 +114,12 @@ WEAK(memset)
or a1, a3, a1
#endif
+.Ldo_duff3:
/* Calculate end address */
andi a4, a2, ~(SZREG-1)
add a3, t0, a4
+.Ldo_duff4:
andi a4, a4, 31*SZREG /* Calculate remainder */
beqz a4, .Lduff_loop /* Shortcut if no remainder */
neg a4, a4
@@ -100,6 +172,15 @@ WEAK(memset)
addi t0, t0, 32*SZREG
bltu t0, a3, .Lduff_loop
+
+#ifdef CONFIG_RISCV_ISA_ZICBOZ
+ ALT_ZICBOZ("j .Lcount_update", "nop")
+ beqz t1, .Lcount_update
+ sub a2, a2, t4 /* Difference was saved above */
+ j .Ldo_zero2
+#endif
+
+.Lcount_update:
andi a2, a2, SZREG-1 /* Update count */
.Lfinish:
--
2.37.3
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
next prev parent reply other threads:[~2022-10-27 13:02 UTC|newest]
Thread overview: 80+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-27 13:02 [PATCH 0/9] RISC-V: Apply Zicboz to clear_page and memset Andrew Jones
2022-10-27 13:02 ` Andrew Jones
2022-10-27 13:02 ` [PATCH 1/9] RISC-V: Factor out body of riscv_init_cbom_blocksize loop Andrew Jones
2022-10-27 13:02 ` Andrew Jones
2022-10-27 14:58 ` Heiko Stübner
2022-10-27 14:58 ` Heiko Stübner
2022-10-30 20:31 ` Conor Dooley
2022-10-30 20:31 ` Conor Dooley
2022-10-31 8:11 ` Andrew Jones
2022-10-31 8:11 ` Andrew Jones
2022-10-27 13:02 ` [PATCH 2/9] RISC-V: Add Zicboz detection and block size parsing Andrew Jones
2022-10-27 13:02 ` Andrew Jones
2022-10-27 15:03 ` Heiko Stübner
2022-10-27 15:03 ` Heiko Stübner
2022-10-27 15:42 ` Andrew Jones
2022-10-27 15:42 ` Andrew Jones
2022-10-30 20:47 ` Conor Dooley
2022-10-30 20:47 ` Conor Dooley
2022-10-31 8:12 ` Andrew Jones
2022-10-31 8:12 ` Andrew Jones
2022-11-13 22:24 ` Conor Dooley
2022-11-13 22:24 ` Conor Dooley
2022-11-14 8:29 ` Andrew Jones
2022-11-14 8:29 ` Andrew Jones
2022-10-27 13:02 ` [PATCH 3/9] RISC-V: insn-def: Define cbo.zero Andrew Jones
2022-10-27 13:02 ` Andrew Jones
2022-10-27 15:37 ` Heiko Stübner
2022-10-27 15:37 ` Heiko Stübner
2022-10-30 21:08 ` Conor Dooley
2022-10-30 21:08 ` Conor Dooley
2022-10-31 8:18 ` Andrew Jones
2022-10-31 8:18 ` Andrew Jones
2022-10-27 13:02 ` [PATCH 4/9] RISC-V: Use Zicboz in clear_page when available Andrew Jones
2022-10-27 13:02 ` Andrew Jones
2022-10-27 13:02 ` [PATCH 5/9] RISC-V: KVM: Provide UAPI for Zicboz block size Andrew Jones
2022-10-27 13:02 ` Andrew Jones
2022-10-30 21:23 ` Conor Dooley
2022-10-30 21:23 ` Conor Dooley
2022-11-27 5:37 ` Anup Patel
2022-11-27 5:37 ` Anup Patel
2022-10-27 13:02 ` [PATCH 6/9] RISC-V: KVM: Expose Zicboz to the guest Andrew Jones
2022-10-27 13:02 ` Andrew Jones
2022-10-30 21:23 ` Conor Dooley
2022-10-30 21:23 ` Conor Dooley
2022-11-27 5:38 ` Anup Patel
2022-11-27 5:38 ` Anup Patel
2022-10-27 13:02 ` [PATCH 7/9] RISC-V: lib: Improve memset assembler formatting Andrew Jones
2022-10-27 13:02 ` Andrew Jones
2022-10-30 21:27 ` Conor Dooley
2022-10-30 21:27 ` Conor Dooley
2022-10-27 13:02 ` [PATCH 8/9] RISC-V: lib: Use named labels in memset Andrew Jones
2022-10-27 13:02 ` Andrew Jones
2022-10-30 22:15 ` Conor Dooley
2022-10-30 22:15 ` Conor Dooley
2022-10-31 8:24 ` Andrew Jones
2022-10-31 8:24 ` Andrew Jones
2022-10-27 13:02 ` Andrew Jones [this message]
2022-10-27 13:02 ` [PATCH 9/9] RISC-V: Use Zicboz in memset when available Andrew Jones
2022-10-30 22:35 ` Conor Dooley
2022-10-30 22:35 ` Conor Dooley
2022-10-31 8:30 ` Andrew Jones
2022-10-31 8:30 ` Andrew Jones
2022-11-03 2:43 ` Palmer Dabbelt
2022-11-03 2:43 ` Palmer Dabbelt
2022-11-03 10:21 ` Andrew Jones
2022-11-03 10:21 ` Andrew Jones
2022-10-29 9:59 ` [PATCH 0/9] RISC-V: Apply Zicboz to clear_page and memset Andrew Jones
2022-10-29 9:59 ` Andrew Jones
2022-10-30 20:23 ` Conor Dooley
2022-10-30 20:23 ` Conor Dooley
2022-10-31 8:39 ` Andrew Jones
2022-10-31 8:39 ` Andrew Jones
2022-11-01 10:37 ` Andrew Jones
2022-11-01 10:37 ` Andrew Jones
2022-11-01 10:53 ` Andrew Jones
2022-11-01 10:53 ` Andrew Jones
2022-12-20 12:55 ` Conor Dooley
2022-12-20 12:55 ` Conor Dooley
2022-12-26 18:56 ` Andrew Jones
2022-12-26 18:56 ` Andrew Jones
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221027130247.31634-10-ajones@ventanamicro.com \
--to=ajones@ventanamicro.com \
--cc=kvm-riscv@lists.infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.