From: Bo Gan <ganboing@gmail.com>
To: opensbi@lists.infradead.org, wangruikang@iscas.ac.cn,
dramforever@live.com, andrew.jones@oss.qualcomm.com
Cc: cleger@rivosinc.com, pjw@kernel.org, asrinivasan@oss.tenstorrent.com
Subject: [PATCH v2 4/4] lib: sbi: Rework misaligned vector load/store
Date: Mon, 8 Jun 2026 23:00:24 -0700 [thread overview]
Message-ID: <20260609060024.706-5-ganboing@gmail.com> (raw)
In-Reply-To: <20260609060024.706-1-ganboing@gmail.com>
Fix the following issues with misaligned vector load/store:
a. Stack overflow: the mask[VLEN_MAX / 8] variable consumes 8K stack
space, given VLEN_MAX=65536, overflowing the default-sized stack.
There's no need to fetch the whole mask in one go, instead, make it
on-demand. Use a 128-byte mask as local buffer to hold the sliding
window of mask. For rvv load, this is allowed -- from the spec:
"The destination vector register group for a masked vector
instruction cannot overlap the source mask register (v0),
unless the destination vector register is being written with
a mask value (e.g., compares) or the scalar result of a reduction"
We don't need to worry about the mask getting overwritten.
b. Maintain the value of vstart upon abort (uptrap) to avoid duplicate
work. After fault resolution, the instruction can restart from the
faulting vstart. For Fault-Only-First loads, reset vstart to 0, as
previously done so, to conform to spec.
c. Explicitly set VS dirty in VSSTATUS with SET_VS_DIRTY() if faulting
from V=1, and if any vector register, including vstart/vl/vtype, gets
changed in the handler. It can add 1 unnecessary op to set VS dirty
in M/SSTATUS (not VSSTATUS), where the HW already did, but for code
simplicity, do it anyway. The overhead should be negligible.
Signed-off-by: Bo Gan <ganboing@gmail.com>
---
include/sbi/sbi_vector.h | 6 +++
lib/sbi/sbi_trap_v_ldst.c | 103 +++++++++++++++++++++++++-------------
2 files changed, 74 insertions(+), 35 deletions(-)
diff --git a/include/sbi/sbi_vector.h b/include/sbi/sbi_vector.h
index f00184f0..c14f3174 100644
--- a/include/sbi/sbi_vector.h
+++ b/include/sbi/sbi_vector.h
@@ -20,6 +20,12 @@ struct sbi_vector_context {
uint8_t vregs[];
};
+#define SET_VS_DIRTY(regs) do { \
+ if (sbi_regs_from_virt(regs)) \
+ csr_set(CSR_VSSTATUS, MSTATUS_VS); \
+ regs->mstatus |= MSTATUS_VS; \
+} while(0)
+
#ifdef OPENSBI_CC_SUPPORT_VECTOR
void sbi_vector_save(struct sbi_vector_context *dst);
void sbi_vector_restore(const struct sbi_vector_context *src);
diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
index 02f7d6cc..0f29dcf9 100644
--- a/lib/sbi/sbi_trap_v_ldst.c
+++ b/lib/sbi/sbi_trap_v_ldst.c
@@ -16,11 +16,11 @@
#include <sbi/sbi_trap_ldst.h>
#include <sbi/sbi_trap.h>
#include <sbi/sbi_unpriv.h>
-#include <sbi/sbi_trap.h>
+#include <sbi/sbi_vector.h>
#ifdef OPENSBI_CC_SUPPORT_VECTOR
-#define VLEN_MAX 65536
+#define MASK_BUFFLEN 1024
static inline void set_vreg(ulong vlenb, ulong which,
ulong pos, ulong size, const uint8_t *bytes)
@@ -168,7 +168,7 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
ulong vl = csr_read(CSR_VL);
ulong vtype = csr_read(CSR_VTYPE);
ulong vlenb = csr_read(CSR_VLENB);
- ulong vstart = csr_read(CSR_VSTART);
+ ulong vstart = csr_read(CSR_VSTART), orig_vstart = vstart;
ulong base = GET_RS1(insn, regs);
ulong stride = GET_RS2(insn, regs);
ulong vd = GET_VD(insn);
@@ -178,8 +178,9 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
ulong vlmul = GET_VLMUL(vtype);
bool illegal = GET_MEW(insn);
bool masked = IS_MASKED(insn);
- uint8_t mask[VLEN_MAX / 8];
+ uint8_t mask[MASK_BUFFLEN / 8];
uint8_t bytes[8 * sizeof(uint64_t)];
+ ulong mask_len = MASK_BUFFLEN < vlenb * 8 ? MASK_BUFFLEN : vlenb * 8;
ulong len = GET_LEN(view);
ulong nf = GET_NF(insn);
ulong vemul = GET_VEMUL(vlmul, view, vsew);
@@ -200,7 +201,7 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
stride = nf * len;
}
- if (illegal || vlenb > VLEN_MAX / 8) {
+ if (illegal) {
struct sbi_trap_info trap = {
uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
uptrap.tval = insn,
@@ -208,12 +209,16 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
return sbi_trap_redirect(regs, &trap);
}
- if (masked)
- get_vreg(vlenb, 0, 0, vlenb, mask);
-
do {
- if (masked && (~mask[vstart / 8] & BIT(vstart % 8)))
- continue;
+ if (masked) {
+ if (vstart == orig_vstart || vstart % mask_len == 0)
+ /* Fetch a mask_len chunk of mask */
+ get_vreg(vlenb, 0, vstart / mask_len * mask_len,
+ mask_len, mask);
+
+ if (~mask[vstart % mask_len / 8] & BIT(vstart % 8))
+ continue;
+ }
/* compute element address */
ulong addr = base + vstart * stride;
@@ -232,15 +237,21 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
sbi_load_loop(bytes + seg * len,
addr + seg * len, len, &uptrap);
- if (uptrap.cause) {
- if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
- vl = vstart;
- break;
- }
- vsetvl(vl, vtype);
- sbi_misaligned_v_tinst_fixup(&uptrap);
- return sbi_trap_redirect(regs, &uptrap);
+ if (!uptrap.cause)
+ continue;
+
+ if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
+ vl = vstart;
+ goto done;
}
+
+ vsetvl(vl, vtype);
+ csr_write(CSR_VSTART, vstart);
+ /* Don't forget to set dirty if vstart has changed */
+ if (vstart != orig_vstart)
+ SET_VS_DIRTY(regs);
+ sbi_misaligned_v_tinst_fixup(&uptrap);
+ return sbi_trap_redirect(regs, &uptrap);
}
/* write load data to regfile */
@@ -249,8 +260,15 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
len, &bytes[seg * len]);
} while (++vstart < vl);
+done:
/* restore clobbered vl/vtype */
- vsetvl(vl, vtype);
+ vsetvl(vl, vtype); // VSTART resets to 0
+
+ /*
+ * At least 1 element is processed, or vl is changed above in
+ * the FAULT_ONLY_FIRST_LOAD path, thus set dirty.
+ */
+ SET_VS_DIRTY(regs);
/* Return a >0 value for the caller to advance mepc */
return 1;
@@ -263,7 +281,7 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
ulong vl = csr_read(CSR_VL);
ulong vtype = csr_read(CSR_VTYPE);
ulong vlenb = csr_read(CSR_VLENB);
- ulong vstart = csr_read(CSR_VSTART);
+ ulong vstart = csr_read(CSR_VSTART), orig_vstart = vstart;
ulong base = GET_RS1(insn, regs);
ulong stride = GET_RS2(insn, regs);
ulong vd = GET_VD(insn);
@@ -273,8 +291,9 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
ulong vlmul = GET_VLMUL(vtype);
bool illegal = GET_MEW(insn);
bool masked = IS_MASKED(insn);
- uint8_t mask[VLEN_MAX / 8];
+ uint8_t mask[MASK_BUFFLEN / 8];
uint8_t bytes[8 * sizeof(uint64_t)];
+ ulong mask_len = MASK_BUFFLEN < vlenb * 8 ? MASK_BUFFLEN : vlenb * 8;
ulong len = GET_LEN(view);
ulong nf = GET_NF(insn);
ulong vemul = GET_VEMUL(vlmul, view, vsew);
@@ -295,7 +314,7 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
stride = nf * len;
}
- if (illegal || vlenb > VLEN_MAX / 8) {
+ if (illegal) {
struct sbi_trap_info trap = {
uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
uptrap.tval = insn,
@@ -303,12 +322,16 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
return sbi_trap_redirect(regs, &trap);
}
- if (masked)
- get_vreg(vlenb, 0, 0, vlenb, mask);
-
do {
- if (masked && (~mask[vstart / 8] & BIT(vstart % 8)))
- continue;
+ if (masked) {
+ if (vstart == orig_vstart || vstart % mask_len == 0)
+ /* Fetch a mask_len chunk of mask */
+ get_vreg(vlenb, 0, vstart / mask_len * mask_len,
+ mask_len, mask);
+
+ if (~mask[vstart % mask_len / 8] & BIT(vstart % 8))
+ continue;
+ }
/* compute element address */
ulong addr = base + vstart * stride;
@@ -325,23 +348,33 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
get_vreg(vlenb, vd + seg * emul, vstart * len,
len, &bytes[seg * len]);
- csr_write(CSR_VSTART, vstart);
-
/* write store data to memory */
for (ulong seg = 0; seg < nf; seg++) {
sbi_store_loop(bytes + seg * len,
addr + seg * len, len, &uptrap);
- if (uptrap.cause) {
- vsetvl(vl, vtype);
- sbi_misaligned_v_tinst_fixup(&uptrap);
- return sbi_trap_redirect(regs, &uptrap);
- }
+ if (!uptrap.cause)
+ continue;
+
+ vsetvl(vl, vtype);
+ csr_write(CSR_VSTART, vstart);
+ /* Don't forget to set dirty if vstart has changed */
+ if (vstart != orig_vstart)
+ SET_VS_DIRTY(regs);
+ sbi_misaligned_v_tinst_fixup(&uptrap);
+ return sbi_trap_redirect(regs, &uptrap);
}
} while (++vstart < vl);
/* restore clobbered vl/vtype */
- vsetvl(vl, vtype);
+ vsetvl(vl, vtype); // VSTART resets to 0
+
+ /*
+ * No need to set dirty for memory store, but as VSTART resets to
+ * 0 above, need to set dirty if it's originally not 0.
+ */
+ if (orig_vstart != 0)
+ SET_VS_DIRTY(regs);
/* Return a >0 value for the caller to advance mepc */
return 1;
--
2.34.1
--
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi
next prev parent reply other threads:[~2026-06-09 6:02 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-09 6:00 [PATCH v2 0/4] Fixes for vector misaligned load/store handlers Bo Gan
2026-06-09 6:00 ` [PATCH v2 1/4] lib: sbi: cosmetic changes to reduce indentation Bo Gan
2026-06-09 6:00 ` [PATCH v2 2/4] lib: sbi: Rework and split sbi_misaligned(_v)_tinst_fixup Bo Gan
2026-06-09 6:00 ` [PATCH v2 3/4] lib: sbi: Add variable-length unprivilege access functions Bo Gan
2026-06-09 6:00 ` Bo Gan [this message]
2026-06-09 22:02 ` [PATCH v2 0/4] Fixes for vector misaligned load/store handlers Anirudh Srinivasan
2026-06-09 23:54 ` Bo Gan
2026-06-09 23:59 ` Anirudh Srinivasan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260609060024.706-5-ganboing@gmail.com \
--to=ganboing@gmail.com \
--cc=andrew.jones@oss.qualcomm.com \
--cc=asrinivasan@oss.tenstorrent.com \
--cc=cleger@rivosinc.com \
--cc=dramforever@live.com \
--cc=opensbi@lists.infradead.org \
--cc=pjw@kernel.org \
--cc=wangruikang@iscas.ac.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox