OpenSBI Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/4] Fixes for vector misaligned load/store handlers
@ 2026-06-09  6:00 Bo Gan
  2026-06-09  6:00 ` [PATCH v2 1/4] lib: sbi: cosmetic changes to reduce indentation Bo Gan
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: Bo Gan @ 2026-06-09  6:00 UTC (permalink / raw)
  To: opensbi, wangruikang, dramforever, andrew.jones; +Cc: cleger, pjw, asrinivasan

Re-visit the vector misaligned load/store handlers and fix:

a. Avoid stack overflow by using a small sliding mask[] buffer,
   thus optimizes stack usage *IMPORTANT* (correctness). There's no-
   longer a need to have a pre-defined vlen maximum, and worry about
   whether the stack can hold the mask[] variable.

b. Maintain the value of vstart when redirecting uptrap. (optmization)
   Avoids doing duplicate work when the instruction gets restarted.

c. Explicitly set VS dirty in (V)SSTATUS. (correctness), VS in
   VSSTATUS must be set dirty if coming from V=1.

d. Zero out tinst in uptrap if not guest-page fault (correctness).

This is a follow up patch to [1] and should be applied on top.
[1] https://lore.kernel.org/opensbi/CAEev2e_Vg1mMP4mOKanFX_EGeUzpczRcWW++vBBuN1CfyM0BJw@mail.gmail.com/T/#t
---
v2: Fix the wrong PATCH 4/4 generated in v1.

---
Bo Gan (4):
  lib: sbi: cosmetic changes to reduce indentation
  lib: sbi: Rework and split sbi_misaligned(_v)_tinst_fixup
  lib: sbi: Add variable-length unprivilege access functions
  lib: sbi: Rework misaligned vector load/store

 include/sbi/sbi_trap_ldst.h |   3 -
 include/sbi/sbi_unpriv.h    |  16 +++
 include/sbi/sbi_vector.h    |   6 ++
 lib/sbi/sbi_trap_ldst.c     |  66 +++++++-----
 lib/sbi/sbi_trap_v_ldst.c   | 201 ++++++++++++++++++++++--------------
 lib/sbi/sbi_unpriv.c        |  88 ++++++++++++++--
 6 files changed, 270 insertions(+), 110 deletions(-)

-- 
2.34.1


-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 1/4] lib: sbi: cosmetic changes to reduce indentation
  2026-06-09  6:00 [PATCH v2 0/4] Fixes for vector misaligned load/store handlers Bo Gan
@ 2026-06-09  6:00 ` Bo Gan
  2026-06-09  6:00 ` [PATCH v2 2/4] lib: sbi: Rework and split sbi_misaligned(_v)_tinst_fixup Bo Gan
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Bo Gan @ 2026-06-09  6:00 UTC (permalink / raw)
  To: opensbi, wangruikang, dramforever, andrew.jones; +Cc: cleger, pjw, asrinivasan

In preparation for subsequent patches

Signed-off-by: Bo Gan <ganboing@gmail.com>
---
 lib/sbi/sbi_trap_v_ldst.c | 117 +++++++++++++++++++-------------------
 1 file changed, 60 insertions(+), 57 deletions(-)

diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
index 57f12b83..7d2e1409 100644
--- a/lib/sbi/sbi_trap_v_ldst.c
+++ b/lib/sbi/sbi_trap_v_ldst.c
@@ -11,6 +11,7 @@
 
 #include <sbi/riscv_asm.h>
 #include <sbi/riscv_encoding.h>
+#include <sbi/sbi_bitops.h>
 #include <sbi/sbi_error.h>
 #include <sbi/sbi_trap_ldst.h>
 #include <sbi/sbi_trap.h>
@@ -189,44 +190,45 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
 		get_vreg(vlenb, 0, 0, vlenb, mask);
 
 	do {
-		if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
-			/* compute element address */
-			ulong addr = base + vstart * stride;
+		if (masked && (~mask[vstart / 8] & BIT(vstart % 8)))
+			continue;
 
-			if (IS_INDEXED_LOAD(insn)) {
-				ulong offset = 0;
+		/* compute element address */
+		ulong addr = base + vstart * stride;
 
-				get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
-				addr = base + offset;
-			}
+		if (IS_INDEXED_LOAD(insn)) {
+			ulong offset = 0;
+
+			get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
+			addr = base + offset;
+		}
 
-			csr_write(CSR_VSTART, vstart);
-
-			/* obtain load data from memory */
-			for (ulong seg = 0; seg < nf; seg++) {
-				for (ulong i = 0; i < len; i++) {
-					bytes[seg * len + i] =
-						sbi_load_u8((void *)(addr + seg * len + i),
-							    &uptrap);
-
-					if (uptrap.cause) {
-						if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
-							vl = vstart;
-							break;
-						}
-						vsetvl(vl, vtype);
-						uptrap.tinst = sbi_misaligned_tinst_fixup(
-							orig_trap->tinst, uptrap.tinst, i);
-						return sbi_trap_redirect(regs, &uptrap);
+		csr_write(CSR_VSTART, vstart);
+
+		/* obtain load data from memory */
+		for (ulong seg = 0; seg < nf; seg++) {
+			for (ulong i = 0; i < len; i++) {
+				bytes[seg * len + i] =
+					sbi_load_u8((void *)(addr + seg * len + i),
+						    &uptrap);
+
+				if (uptrap.cause) {
+					if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
+						vl = vstart;
+						break;
 					}
+					vsetvl(vl, vtype);
+					uptrap.tinst = sbi_misaligned_tinst_fixup(
+						orig_trap->tinst, uptrap.tinst, i);
+					return sbi_trap_redirect(regs, &uptrap);
 				}
 			}
-
-			/* write load data to regfile */
-			for (ulong seg = 0; seg < nf; seg++)
-				set_vreg(vlenb, vd + seg * emul, vstart * len,
-					 len, &bytes[seg * len]);
 		}
+
+		/* write load data to regfile */
+		for (ulong seg = 0; seg < nf; seg++)
+			set_vreg(vlenb, vd + seg * emul, vstart * len,
+				 len, &bytes[seg * len]);
 	} while (++vstart < vl);
 
 	/* restore clobbered vl/vtype */
@@ -288,35 +290,36 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
 		get_vreg(vlenb, 0, 0, vlenb, mask);
 
 	do {
-		if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
-			/* compute element address */
-			ulong addr = base + vstart * stride;
+		if (masked && (~mask[vstart / 8] & BIT(vstart % 8)))
+			continue;
 
-			if (IS_INDEXED_STORE(insn)) {
-				ulong offset = 0;
+		/* compute element address */
+		ulong addr = base + vstart * stride;
 
-				get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
-				addr = base + offset;
-			}
+		if (IS_INDEXED_STORE(insn)) {
+			ulong offset = 0;
 
-			/* obtain store data from regfile */
-			for (ulong seg = 0; seg < nf; seg++)
-				get_vreg(vlenb, vd + seg * emul, vstart * len,
-					 len, &bytes[seg * len]);
-
-			csr_write(CSR_VSTART, vstart);
-
-			/* write store data to memory */
-			for (ulong seg = 0; seg < nf; seg++) {
-				for (ulong i = 0; i < len; i++) {
-					sbi_store_u8((void *)(addr + seg * len + i),
-						     bytes[seg * len + i], &uptrap);
-					if (uptrap.cause) {
-						vsetvl(vl, vtype);
-						uptrap.tinst = sbi_misaligned_tinst_fixup(
-							orig_trap->tinst, uptrap.tinst, i);
-						return sbi_trap_redirect(regs, &uptrap);
-					}
+			get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
+			addr = base + offset;
+		}
+
+		/* obtain store data from regfile */
+		for (ulong seg = 0; seg < nf; seg++)
+			get_vreg(vlenb, vd + seg * emul, vstart * len,
+				 len, &bytes[seg * len]);
+
+		csr_write(CSR_VSTART, vstart);
+
+		/* write store data to memory */
+		for (ulong seg = 0; seg < nf; seg++) {
+			for (ulong i = 0; i < len; i++) {
+				sbi_store_u8((void *)(addr + seg * len + i),
+					     bytes[seg * len + i], &uptrap);
+				if (uptrap.cause) {
+					vsetvl(vl, vtype);
+					uptrap.tinst = sbi_misaligned_tinst_fixup(
+						orig_trap->tinst, uptrap.tinst, i);
+					return sbi_trap_redirect(regs, &uptrap);
 				}
 			}
 		}
-- 
2.34.1


-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 2/4] lib: sbi: Rework and split sbi_misaligned(_v)_tinst_fixup
  2026-06-09  6:00 [PATCH v2 0/4] Fixes for vector misaligned load/store handlers Bo Gan
  2026-06-09  6:00 ` [PATCH v2 1/4] lib: sbi: cosmetic changes to reduce indentation Bo Gan
@ 2026-06-09  6:00 ` Bo Gan
  2026-06-09  6:00 ` [PATCH v2 3/4] lib: sbi: Add variable-length unprivilege access functions Bo Gan
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Bo Gan @ 2026-06-09  6:00 UTC (permalink / raw)
  To: opensbi, wangruikang, dramforever, andrew.jones; +Cc: cleger, pjw, asrinivasan

The load/store address offset between the uptrap and the orig_trap
can be derived by orig_trap->tval - uptrap->tval, thus refactor
the function prototype for simplicity.

For vector load, sbi_misaligned_v_tinst_fixup is introduced. There's
no transformed instruction for vector load/store, so null out tinst
if the fault is not a guest-page fault.

Signed-off-by: Bo Gan <ganboing@gmail.com>
---
 include/sbi/sbi_trap_ldst.h |  3 ---
 lib/sbi/sbi_trap_ldst.c     | 46 +++++++++++++++++++++++++++----------
 lib/sbi/sbi_trap_v_ldst.c   | 31 ++++++++++++++++++++-----
 3 files changed, 59 insertions(+), 21 deletions(-)

diff --git a/include/sbi/sbi_trap_ldst.h b/include/sbi/sbi_trap_ldst.h
index 33c348c5..30228d24 100644
--- a/include/sbi/sbi_trap_ldst.h
+++ b/include/sbi/sbi_trap_ldst.h
@@ -28,9 +28,6 @@ int sbi_load_access_handler(struct sbi_trap_context *tcntx);
 
 int sbi_store_access_handler(struct sbi_trap_context *tcntx);
 
-ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
-				 ulong addr_offset);
-
 int sbi_misaligned_v_ld_emulator(ulong insn,
 				 struct sbi_trap_context *tcntx);
 
diff --git a/lib/sbi/sbi_trap_ldst.c b/lib/sbi/sbi_trap_ldst.c
index c1392251..5f7de662 100644
--- a/lib/sbi/sbi_trap_ldst.c
+++ b/lib/sbi/sbi_trap_ldst.c
@@ -32,16 +32,40 @@ typedef int (*sbi_trap_st_emulator)(ulong insn, int wlen, ulong waddr,
 				    union sbi_ldst_data in_val,
 				    struct sbi_trap_context *tcntx);
 
-ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
-					ulong addr_offset)
+/**
+ * Handling of misaligned fault is done by a collection of smaller, but
+ * aligned load/store(s). Another fault (load/store, page fault...) can
+ * arise from any of them, then the handling gets aborted. We must fixup
+ * the tinst to pretend the fault was rised from the original insn.
+ * Specifically, fixup the offset field using the tval diff between the
+ * new trap and the original one (if required).
+ */
+static inline void sbi_misaligned_tinst_fixup(
+			const struct sbi_trap_info *orig_trap,
+			      struct sbi_trap_info *uptrap)
 {
-	if (new_tinst == INSN_PSEUDO_VS_LOAD ||
-	    new_tinst == INSN_PSEUDO_VS_STORE)
-		return new_tinst;
-	else if (orig_tinst == 0)
-		return 0UL;
+	ulong offset = uptrap->tval - orig_trap->tval;
+
+	/*
+	 * The function is called in code path for handling a scalar
+	 * load/store misaligned fault, thus the new uptrap can't have
+	 * custom value of tinst
+	 */
+	if (uptrap->tinst == INSN_PSEUDO_VS_LOAD ||
+	    uptrap->tinst == INSN_PSEUDO_VS_STORE)
+		/* Use uptrap as-is for guest-page faults */
+		return;
+	/*
+	 * Only fixup if orig tinst is valid. Otherwise, discard the
+	 * new tinst to be on the safe side. Never use new tinst as-is!
+	 * It's load/store width surely mismatches the original width.
+	 * For vector, discard it regardless. It doesn't make sense to
+	 * have a transformed tinst
+	 */
+	else if (orig_trap->tinst == 0)
+		uptrap->tinst = 0;
 	else
-		return orig_tinst | (addr_offset << SH_RS1);
+		uptrap->tinst = orig_trap->tinst | (offset << SH_RS1);
 }
 
 static inline bool sbi_trap_tinst_valid(ulong tinst)
@@ -464,8 +488,7 @@ static int sbi_misaligned_ld_emulator(ulong insn, int rlen, ulong addr,
 		out_val->data_bytes[i] =
 			sbi_load_u8((void *)(addr + i), &uptrap);
 		if (uptrap.cause) {
-			uptrap.tinst = sbi_misaligned_tinst_fixup(
-				orig_trap->tinst, uptrap.tinst, i);
+			sbi_misaligned_tinst_fixup(orig_trap, &uptrap);
 			return sbi_trap_redirect(regs, &uptrap);
 		}
 	}
@@ -501,8 +524,7 @@ static int sbi_misaligned_st_emulator(ulong insn, int wlen, ulong addr,
 		sbi_store_u8((void *)(addr + i),
 			     in_val.data_bytes[i], &uptrap);
 		if (uptrap.cause) {
-			uptrap.tinst = sbi_misaligned_tinst_fixup(
-				orig_trap->tinst, uptrap.tinst, i);
+			sbi_misaligned_tinst_fixup(orig_trap, &uptrap);
 			return sbi_trap_redirect(regs, &uptrap);
 		}
 	}
diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
index 7d2e1409..e16e3def 100644
--- a/lib/sbi/sbi_trap_v_ldst.c
+++ b/lib/sbi/sbi_trap_v_ldst.c
@@ -138,9 +138,31 @@ static inline void vsetvl(ulong vl, ulong vtype)
 			:: "r" (vl), "r" (vtype));
 }
 
+/**
+ * Handling of misaligned fault is done by a collection of smaller, but
+ * aligned load/store(s). Another fault (load/store, page fault...) can
+ * arise from any of them, then the handling gets aborted. We must fixup
+ * the tinst to pretend the fault was rised from the original insn. For
+ * vector insn, simply null out tinst if it's not a guest-page fault, as
+ * there's no transformed insn for vector load/store
+ */
+static inline void sbi_misaligned_v_tinst_fixup(struct sbi_trap_info *uptrap)
+{
+	/*
+	 * The function is called in code path for handling a vector
+	 * load/store misaligned fault, thus the new uptrap can't have
+	 * custom value of tinst
+	 */
+	if (uptrap->tinst == INSN_PSEUDO_VS_LOAD ||
+	    uptrap->tinst == INSN_PSEUDO_VS_STORE)
+		/* Use uptrap as-is for guest-page faults */
+		return;
+
+	uptrap->tinst = 0;
+}
+
 int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
 {
-	const struct sbi_trap_info *orig_trap = &tcntx->trap;
 	struct sbi_trap_regs *regs = &tcntx->regs;
 	struct sbi_trap_info uptrap;
 	ulong vl = csr_read(CSR_VL);
@@ -218,8 +240,7 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
 						break;
 					}
 					vsetvl(vl, vtype);
-					uptrap.tinst = sbi_misaligned_tinst_fixup(
-						orig_trap->tinst, uptrap.tinst, i);
+					sbi_misaligned_v_tinst_fixup(&uptrap);
 					return sbi_trap_redirect(regs, &uptrap);
 				}
 			}
@@ -240,7 +261,6 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
 
 int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
 {
-	const struct sbi_trap_info *orig_trap = &tcntx->trap;
 	struct sbi_trap_regs *regs = &tcntx->regs;
 	struct sbi_trap_info uptrap;
 	ulong vl = csr_read(CSR_VL);
@@ -317,8 +337,7 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
 					     bytes[seg * len + i], &uptrap);
 				if (uptrap.cause) {
 					vsetvl(vl, vtype);
-					uptrap.tinst = sbi_misaligned_tinst_fixup(
-						orig_trap->tinst, uptrap.tinst, i);
+					sbi_misaligned_v_tinst_fixup(&uptrap);
 					return sbi_trap_redirect(regs, &uptrap);
 				}
 			}
-- 
2.34.1


-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 3/4] lib: sbi: Add variable-length unprivilege access functions
  2026-06-09  6:00 [PATCH v2 0/4] Fixes for vector misaligned load/store handlers Bo Gan
  2026-06-09  6:00 ` [PATCH v2 1/4] lib: sbi: cosmetic changes to reduce indentation Bo Gan
  2026-06-09  6:00 ` [PATCH v2 2/4] lib: sbi: Rework and split sbi_misaligned(_v)_tinst_fixup Bo Gan
@ 2026-06-09  6:00 ` Bo Gan
  2026-06-09  6:00 ` [PATCH v2 4/4] lib: sbi: Rework misaligned vector load/store Bo Gan
  2026-06-09 22:02 ` [PATCH v2 0/4] Fixes for vector misaligned load/store handlers Anirudh Srinivasan
  4 siblings, 0 replies; 8+ messages in thread
From: Bo Gan @ 2026-06-09  6:00 UTC (permalink / raw)
  To: opensbi, wangruikang, dramforever, andrew.jones; +Cc: cleger, pjw, asrinivasan

sbi_load/store_loop read/write variable-length buffer unprivileged.
Both function use the widest aligned 8/4/2/1 byte load/stores in each
loop to reduce the total number of iterations.

Also switch the scalar/vector misaligned handlers to make use of such
functions to simplify code.

Miscellaneous: remove the unnecessary [taddr] in inline assembly

Signed-off-by: Bo Gan <ganboing@gmail.com>
---
 include/sbi/sbi_unpriv.h  | 16 +++++++
 lib/sbi/sbi_trap_ldst.c   | 24 ++++-------
 lib/sbi/sbi_trap_v_ldst.c | 38 ++++++++---------
 lib/sbi/sbi_unpriv.c      | 88 ++++++++++++++++++++++++++++++++++++---
 4 files changed, 123 insertions(+), 43 deletions(-)

diff --git a/include/sbi/sbi_unpriv.h b/include/sbi/sbi_unpriv.h
index 8cbd3de0..226cf175 100644
--- a/include/sbi/sbi_unpriv.h
+++ b/include/sbi/sbi_unpriv.h
@@ -15,6 +15,16 @@
 struct sbi_scratch;
 struct sbi_trap_info;
 
+union sbi_unpriv_data {
+	u8 b;
+	u16 h;
+	u32 w;
+#if __riscv_xlen == 64
+	u64 d;
+#endif
+	u8 bytes[__riscv_xlen / 8];
+};
+
 #define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type)           \
 	type sbi_load_##type(const type *addr,             \
 			     struct sbi_trap_info *trap);
@@ -36,6 +46,12 @@ DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u64)
 DECLARE_UNPRIVILEGED_STORE_FUNCTION(u64)
 DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong)
 
+void sbi_load_loop(u8 *buffer, ulong addr, ulong len,
+		   struct sbi_trap_info *trap);
+
+void sbi_store_loop(u8 *buffer, ulong addr, ulong len,
+		    struct sbi_trap_info *trap);
+
 ulong sbi_get_insn(ulong mepc, struct sbi_trap_info *trap);
 
 #endif
diff --git a/lib/sbi/sbi_trap_ldst.c b/lib/sbi/sbi_trap_ldst.c
index 5f7de662..e726520f 100644
--- a/lib/sbi/sbi_trap_ldst.c
+++ b/lib/sbi/sbi_trap_ldst.c
@@ -471,7 +471,6 @@ static int sbi_misaligned_ld_emulator(ulong insn, int rlen, ulong addr,
 	const struct sbi_trap_info *orig_trap = &tcntx->trap;
 	struct sbi_trap_regs *regs = &tcntx->regs;
 	struct sbi_trap_info uptrap;
-	int i;
 
 	if (!rlen) {
 		if (IS_VECTOR_LOAD_STORE(insn))
@@ -484,13 +483,10 @@ static int sbi_misaligned_ld_emulator(ulong insn, int rlen, ulong addr,
 	if (addr != orig_trap->tval)
 		return SBI_EFAIL;
 
-	for (i = 0; i < rlen; i++) {
-		out_val->data_bytes[i] =
-			sbi_load_u8((void *)(addr + i), &uptrap);
-		if (uptrap.cause) {
-			sbi_misaligned_tinst_fixup(orig_trap, &uptrap);
-			return sbi_trap_redirect(regs, &uptrap);
-		}
+	sbi_load_loop(out_val->data_bytes, addr, rlen, &uptrap);
+	if (uptrap.cause) {
+		sbi_misaligned_tinst_fixup(orig_trap, &uptrap);
+		return sbi_trap_redirect(regs, &uptrap);
 	}
 	return rlen;
 }
@@ -507,7 +503,6 @@ static int sbi_misaligned_st_emulator(ulong insn, int wlen, ulong addr,
 	const struct sbi_trap_info *orig_trap = &tcntx->trap;
 	struct sbi_trap_regs *regs = &tcntx->regs;
 	struct sbi_trap_info uptrap;
-	int i;
 
 	if (!wlen) {
 		if (IS_VECTOR_LOAD_STORE(insn))
@@ -520,13 +515,10 @@ static int sbi_misaligned_st_emulator(ulong insn, int wlen, ulong addr,
 	if (addr != orig_trap->tval)
 		return SBI_EFAIL;
 
-	for (i = 0; i < wlen; i++) {
-		sbi_store_u8((void *)(addr + i),
-			     in_val.data_bytes[i], &uptrap);
-		if (uptrap.cause) {
-			sbi_misaligned_tinst_fixup(orig_trap, &uptrap);
-			return sbi_trap_redirect(regs, &uptrap);
-		}
+	sbi_store_loop(in_val.data_bytes, addr, wlen, &uptrap);
+	if (uptrap.cause) {
+		sbi_misaligned_tinst_fixup(orig_trap, &uptrap);
+		return sbi_trap_redirect(regs, &uptrap);
 	}
 	return wlen;
 }
diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
index e16e3def..02f7d6cc 100644
--- a/lib/sbi/sbi_trap_v_ldst.c
+++ b/lib/sbi/sbi_trap_v_ldst.c
@@ -229,20 +229,17 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
 
 		/* obtain load data from memory */
 		for (ulong seg = 0; seg < nf; seg++) {
-			for (ulong i = 0; i < len; i++) {
-				bytes[seg * len + i] =
-					sbi_load_u8((void *)(addr + seg * len + i),
-						    &uptrap);
-
-				if (uptrap.cause) {
-					if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
-						vl = vstart;
-						break;
-					}
-					vsetvl(vl, vtype);
-					sbi_misaligned_v_tinst_fixup(&uptrap);
-					return sbi_trap_redirect(regs, &uptrap);
+			sbi_load_loop(bytes + seg * len,
+				       addr + seg * len, len, &uptrap);
+
+			if (uptrap.cause) {
+				if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
+					vl = vstart;
+					break;
 				}
+				vsetvl(vl, vtype);
+				sbi_misaligned_v_tinst_fixup(&uptrap);
+				return sbi_trap_redirect(regs, &uptrap);
 			}
 		}
 
@@ -332,14 +329,13 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
 
 		/* write store data to memory */
 		for (ulong seg = 0; seg < nf; seg++) {
-			for (ulong i = 0; i < len; i++) {
-				sbi_store_u8((void *)(addr + seg * len + i),
-					     bytes[seg * len + i], &uptrap);
-				if (uptrap.cause) {
-					vsetvl(vl, vtype);
-					sbi_misaligned_v_tinst_fixup(&uptrap);
-					return sbi_trap_redirect(regs, &uptrap);
-				}
+			sbi_store_loop(bytes + seg * len,
+					addr + seg * len, len, &uptrap);
+
+			if (uptrap.cause) {
+				vsetvl(vl, vtype);
+				sbi_misaligned_v_tinst_fixup(&uptrap);
+				return sbi_trap_redirect(regs, &uptrap);
 			}
 		}
 	} while (++vstart < vl);
diff --git a/lib/sbi/sbi_unpriv.c b/lib/sbi/sbi_unpriv.c
index f9bbec59..14b8f3cb 100644
--- a/lib/sbi/sbi_unpriv.c
+++ b/lib/sbi/sbi_unpriv.c
@@ -11,6 +11,7 @@
 #include <sbi/sbi_bitops.h>
 #include <sbi/sbi_hart.h>
 #include <sbi/sbi_scratch.h>
+#include <sbi/sbi_string.h>
 #include <sbi/sbi_trap.h>
 #include <sbi/sbi_unpriv.h>
 
@@ -22,13 +23,12 @@
 	type sbi_load_##type(const type *addr,                                \
 			     struct sbi_trap_info *trap)                      \
 	{                                                                     \
-		register ulong tinfo asm("a3");                               \
+		register ulong tinfo asm("a3") = (ulong)trap;                 \
 		register ulong mstatus = 0;                                   \
 		register ulong mtvec = (ulong)sbi_hart_expected_trap;         \
 		type ret = 0;                                                 \
 		trap->cause = 0;                                              \
 		asm volatile(                                                 \
-			"add %[tinfo], %[taddr], zero\n"                      \
 			"csrrw %[mtvec], " STR(CSR_MTVEC) ", %[mtvec]\n"      \
 			"csrrs %[mstatus], " STR(CSR_MSTATUS) ", %[mprv]\n"   \
 			".option push\n"                                      \
@@ -39,8 +39,7 @@
 			"csrw " STR(CSR_MTVEC) ", %[mtvec]"                   \
 		    : [mstatus] "+&r"(mstatus), [mtvec] "+&r"(mtvec),         \
 		      [tinfo] "+&r"(tinfo), [ret] "=&r"(ret)                  \
-		    : [addr] "m"(*addr), [mprv] "r"(MSTATUS_MPRV),            \
-		      [taddr] "r"((ulong)trap)                                \
+		    : [addr] "m"(*addr), [mprv] "r"(MSTATUS_MPRV)             \
 		    : "a4", "memory");                                        \
 		return ret;                                                   \
 	}
@@ -54,7 +53,6 @@
 		register ulong mtvec = (ulong)sbi_hart_expected_trap;         \
 		trap->cause = 0;                                              \
 		asm volatile(                                                 \
-			"add %[tinfo], %[taddr], zero\n"                      \
 			"csrrw %[mtvec], " STR(CSR_MTVEC) ", %[mtvec]\n"      \
 			"csrrs %[mstatus], " STR(CSR_MSTATUS) ", %[mprv]\n"   \
 			".option push\n"                                      \
@@ -66,7 +64,7 @@
 		    : [mstatus] "+&r"(mstatus), [mtvec] "+&r"(mtvec),         \
 		      [tinfo] "+&r"(tinfo)                                    \
 		    : [addr] "m"(*addr), [mprv] "r"(MSTATUS_MPRV),            \
-		      [val] "r"(val), [taddr] "r"((ulong)trap)                \
+		      [val] "r"(val)                                          \
 		    : "a4", "memory");                                        \
 	}
 
@@ -116,6 +114,84 @@ void sbi_store_u64(u64 *addr, u64 val,
 # error "Unexpected __riscv_xlen"
 #endif
 
+void sbi_load_loop(u8 *buffer, ulong addr, ulong len,
+		   struct sbi_trap_info *trap)
+{
+	union sbi_unpriv_data data;
+
+	trap->cause = 0;
+	while (len) {
+		unsigned int width = __riscv_xlen / 8;
+		void *ptr = (void*)addr;
+
+		while (len < width || (addr & (width - 1)))
+			width /= 2;
+
+		switch (width) {
+		case 1:
+			data.b = sbi_load_u8(ptr, trap);
+			break;
+		case 2:
+			data.h = sbi_load_u16(ptr, trap);
+			break;
+		case 4:
+			data.w = sbi_load_u32(ptr, trap);
+			break;
+#if __riscv_xlen == 64
+		case 8:
+			data.d = sbi_load_u64(ptr, trap);
+			break;
+#endif
+		}
+		if (trap->cause)
+			return;
+
+		sbi_memcpy(buffer, data.bytes, width);
+		len -= width;
+		addr += width;
+		buffer += width;
+	}
+}
+
+void sbi_store_loop(u8 *buffer, ulong addr, ulong len,
+		    struct sbi_trap_info *trap)
+{
+	union sbi_unpriv_data data;
+
+	trap->cause = 0;
+	while (len) {
+		unsigned int width = __riscv_xlen / 8;
+		void *ptr = (void*)addr;
+
+		while (len < width || (addr & (width - 1)))
+			width /= 2;
+
+		sbi_memcpy(data.bytes, buffer, width);
+		switch (width) {
+		case 1:
+			sbi_store_u8(ptr, data.b, trap);
+			break;
+		case 2:
+			sbi_store_u16(ptr, data.h, trap);
+			break;
+		case 4:
+			sbi_store_u32(ptr, data.w, trap);
+			break;
+#if __riscv_xlen == 64
+		case 8:
+			sbi_store_u64(ptr, data.d, trap);
+			break;
+#endif
+		}
+		if (trap->cause)
+			return;
+
+		len -= width;
+		addr += width;
+		buffer += width;
+	}
+}
+
 ulong sbi_get_insn(ulong mepc, struct sbi_trap_info *trap)
 {
 	register ulong tinfo asm("a3");
-- 
2.34.1


-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 4/4] lib: sbi: Rework misaligned vector load/store
  2026-06-09  6:00 [PATCH v2 0/4] Fixes for vector misaligned load/store handlers Bo Gan
                   ` (2 preceding siblings ...)
  2026-06-09  6:00 ` [PATCH v2 3/4] lib: sbi: Add variable-length unprivilege access functions Bo Gan
@ 2026-06-09  6:00 ` Bo Gan
  2026-06-09 22:02 ` [PATCH v2 0/4] Fixes for vector misaligned load/store handlers Anirudh Srinivasan
  4 siblings, 0 replies; 8+ messages in thread
From: Bo Gan @ 2026-06-09  6:00 UTC (permalink / raw)
  To: opensbi, wangruikang, dramforever, andrew.jones; +Cc: cleger, pjw, asrinivasan

Fix the following issues with misaligned vector load/store:

a. Stack overflow: the mask[VLEN_MAX / 8] variable consumes 8K stack
space, given VLEN_MAX=65536, overflowing the default-sized stack.
There's no need to fetch the whole mask in one go, instead, make it
on-demand. Use a 128-byte mask as local buffer to hold the sliding
window of mask. For rvv load, this is allowed -- from the spec:

  "The destination vector register group for a masked vector
   instruction cannot overlap the source mask register (v0),
   unless the destination vector register is being written with
   a mask value (e.g., compares) or the scalar result of a reduction"

We don't need to worry about the mask getting overwritten.

b. Maintain the value of vstart upon abort (uptrap) to avoid duplicate
work. After fault resolution, the instruction can restart from the
faulting vstart. For Fault-Only-First loads, reset vstart to 0, as
previously done so, to conform to spec.

c. Explicitly set VS dirty in VSSTATUS with SET_VS_DIRTY() if faulting
from V=1, and if any vector register, including vstart/vl/vtype, gets
changed in the handler. It can add 1 unnecessary op to set VS dirty
in M/SSTATUS (not VSSTATUS), where the HW already did, but for code
simplicity, do it anyway. The overhead should be negligible.

Signed-off-by: Bo Gan <ganboing@gmail.com>
---
 include/sbi/sbi_vector.h  |   6 +++
 lib/sbi/sbi_trap_v_ldst.c | 103 +++++++++++++++++++++++++-------------
 2 files changed, 74 insertions(+), 35 deletions(-)

diff --git a/include/sbi/sbi_vector.h b/include/sbi/sbi_vector.h
index f00184f0..c14f3174 100644
--- a/include/sbi/sbi_vector.h
+++ b/include/sbi/sbi_vector.h
@@ -20,6 +20,12 @@ struct sbi_vector_context {
 	uint8_t vregs[];
 };
 
+#define SET_VS_DIRTY(regs) do {				\
+	if (sbi_regs_from_virt(regs))			\
+		csr_set(CSR_VSSTATUS, MSTATUS_VS);	\
+	regs->mstatus |= MSTATUS_VS;			\
+} while(0)
+
 #ifdef OPENSBI_CC_SUPPORT_VECTOR
 void sbi_vector_save(struct sbi_vector_context *dst);
 void sbi_vector_restore(const struct sbi_vector_context *src);
diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
index 02f7d6cc..0f29dcf9 100644
--- a/lib/sbi/sbi_trap_v_ldst.c
+++ b/lib/sbi/sbi_trap_v_ldst.c
@@ -16,11 +16,11 @@
 #include <sbi/sbi_trap_ldst.h>
 #include <sbi/sbi_trap.h>
 #include <sbi/sbi_unpriv.h>
-#include <sbi/sbi_trap.h>
+#include <sbi/sbi_vector.h>
 
 #ifdef OPENSBI_CC_SUPPORT_VECTOR
 
-#define VLEN_MAX 65536
+#define MASK_BUFFLEN 1024
 
 static inline void set_vreg(ulong vlenb, ulong which,
 			    ulong pos, ulong size, const uint8_t *bytes)
@@ -168,7 +168,7 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
 	ulong vl = csr_read(CSR_VL);
 	ulong vtype = csr_read(CSR_VTYPE);
 	ulong vlenb = csr_read(CSR_VLENB);
-	ulong vstart = csr_read(CSR_VSTART);
+	ulong vstart = csr_read(CSR_VSTART), orig_vstart = vstart;
 	ulong base = GET_RS1(insn, regs);
 	ulong stride = GET_RS2(insn, regs);
 	ulong vd = GET_VD(insn);
@@ -178,8 +178,9 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
 	ulong vlmul = GET_VLMUL(vtype);
 	bool illegal = GET_MEW(insn);
 	bool masked = IS_MASKED(insn);
-	uint8_t mask[VLEN_MAX / 8];
+	uint8_t mask[MASK_BUFFLEN / 8];
 	uint8_t bytes[8 * sizeof(uint64_t)];
+	ulong mask_len = MASK_BUFFLEN < vlenb * 8 ? MASK_BUFFLEN : vlenb * 8;
 	ulong len = GET_LEN(view);
 	ulong nf = GET_NF(insn);
 	ulong vemul = GET_VEMUL(vlmul, view, vsew);
@@ -200,7 +201,7 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
 		stride = nf * len;
 	}
 
-	if (illegal || vlenb > VLEN_MAX / 8) {
+	if (illegal) {
 		struct sbi_trap_info trap = {
 			uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
 			uptrap.tval = insn,
@@ -208,12 +209,16 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
 		return sbi_trap_redirect(regs, &trap);
 	}
 
-	if (masked)
-		get_vreg(vlenb, 0, 0, vlenb, mask);
-
 	do {
-		if (masked && (~mask[vstart / 8] & BIT(vstart % 8)))
-			continue;
+		if (masked) {
+			if (vstart == orig_vstart || vstart % mask_len == 0)
+				/* Fetch a mask_len chunk of mask */
+				get_vreg(vlenb, 0, vstart / mask_len * mask_len,
+					 mask_len, mask);
+
+			if (~mask[vstart % mask_len / 8] & BIT(vstart % 8))
+				continue;
+		}
 
 		/* compute element address */
 		ulong addr = base + vstart * stride;
@@ -232,15 +237,21 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
 			sbi_load_loop(bytes + seg * len,
 				       addr + seg * len, len, &uptrap);
 
-			if (uptrap.cause) {
-				if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
-					vl = vstart;
-					break;
-				}
-				vsetvl(vl, vtype);
-				sbi_misaligned_v_tinst_fixup(&uptrap);
-				return sbi_trap_redirect(regs, &uptrap);
+			if (!uptrap.cause)
+				continue;
+
+			if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
+				vl = vstart;
+				goto done;
 			}
+
+			vsetvl(vl, vtype);
+			csr_write(CSR_VSTART, vstart);
+			/* Don't forget to set dirty if vstart has changed */
+			if (vstart != orig_vstart)
+				SET_VS_DIRTY(regs);
+			sbi_misaligned_v_tinst_fixup(&uptrap);
+			return sbi_trap_redirect(regs, &uptrap);
 		}
 
 		/* write load data to regfile */
@@ -249,8 +260,15 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
 				 len, &bytes[seg * len]);
 	} while (++vstart < vl);
 
+done:
 	/* restore clobbered vl/vtype */
-	vsetvl(vl, vtype);
+	vsetvl(vl, vtype); // VSTART resets to 0
+
+	/*
+	 * At least 1 element is processed, or vl is changed above in
+	 * the FAULT_ONLY_FIRST_LOAD path, thus set dirty.
+	 */
+	SET_VS_DIRTY(regs);
 
 	/* Return a >0 value for the caller to advance mepc */
 	return 1;
@@ -263,7 +281,7 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
 	ulong vl = csr_read(CSR_VL);
 	ulong vtype = csr_read(CSR_VTYPE);
 	ulong vlenb = csr_read(CSR_VLENB);
-	ulong vstart = csr_read(CSR_VSTART);
+	ulong vstart = csr_read(CSR_VSTART), orig_vstart = vstart;
 	ulong base = GET_RS1(insn, regs);
 	ulong stride = GET_RS2(insn, regs);
 	ulong vd = GET_VD(insn);
@@ -273,8 +291,9 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
 	ulong vlmul = GET_VLMUL(vtype);
 	bool illegal = GET_MEW(insn);
 	bool masked = IS_MASKED(insn);
-	uint8_t mask[VLEN_MAX / 8];
+	uint8_t mask[MASK_BUFFLEN / 8];
 	uint8_t bytes[8 * sizeof(uint64_t)];
+	ulong mask_len = MASK_BUFFLEN < vlenb * 8 ? MASK_BUFFLEN : vlenb * 8;
 	ulong len = GET_LEN(view);
 	ulong nf = GET_NF(insn);
 	ulong vemul = GET_VEMUL(vlmul, view, vsew);
@@ -295,7 +314,7 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
 		stride = nf * len;
 	}
 
-	if (illegal || vlenb > VLEN_MAX / 8) {
+	if (illegal) {
 		struct sbi_trap_info trap = {
 			uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
 			uptrap.tval = insn,
@@ -303,12 +322,16 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
 		return sbi_trap_redirect(regs, &trap);
 	}
 
-	if (masked)
-		get_vreg(vlenb, 0, 0, vlenb, mask);
-
 	do {
-		if (masked && (~mask[vstart / 8] & BIT(vstart % 8)))
-			continue;
+		if (masked) {
+			if (vstart == orig_vstart || vstart % mask_len == 0)
+				/* Fetch a mask_len chunk of mask */
+				get_vreg(vlenb, 0, vstart / mask_len * mask_len,
+					 mask_len, mask);
+
+			if (~mask[vstart % mask_len / 8] & BIT(vstart % 8))
+				continue;
+		}
 
 		/* compute element address */
 		ulong addr = base + vstart * stride;
@@ -325,23 +348,33 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
 			get_vreg(vlenb, vd + seg * emul, vstart * len,
 				 len, &bytes[seg * len]);
 
-		csr_write(CSR_VSTART, vstart);
-
 		/* write store data to memory */
 		for (ulong seg = 0; seg < nf; seg++) {
 			sbi_store_loop(bytes + seg * len,
 					addr + seg * len, len, &uptrap);
 
-			if (uptrap.cause) {
-				vsetvl(vl, vtype);
-				sbi_misaligned_v_tinst_fixup(&uptrap);
-				return sbi_trap_redirect(regs, &uptrap);
-			}
+			if (!uptrap.cause)
+				continue;
+
+			vsetvl(vl, vtype);
+			csr_write(CSR_VSTART, vstart);
+			/* Don't forget to set dirty if vstart has changed */
+			if (vstart != orig_vstart)
+				SET_VS_DIRTY(regs);
+			sbi_misaligned_v_tinst_fixup(&uptrap);
+			return sbi_trap_redirect(regs, &uptrap);
 		}
 	} while (++vstart < vl);
 
 	/* restore clobbered vl/vtype */
-	vsetvl(vl, vtype);
+	vsetvl(vl, vtype); // VSTART resets to 0
+
+	/*
+	 * No need to set dirty for memory store, but as VSTART resets to
+	 * 0 above, need to set dirty if it's originally not 0.
+	 */
+	if (orig_vstart != 0)
+		SET_VS_DIRTY(regs);
 
 	/* Return a >0 value for the caller to advance mepc */
 	return 1;
-- 
2.34.1


-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 0/4] Fixes for vector misaligned load/store handlers
  2026-06-09  6:00 [PATCH v2 0/4] Fixes for vector misaligned load/store handlers Bo Gan
                   ` (3 preceding siblings ...)
  2026-06-09  6:00 ` [PATCH v2 4/4] lib: sbi: Rework misaligned vector load/store Bo Gan
@ 2026-06-09 22:02 ` Anirudh Srinivasan
  2026-06-09 23:54   ` Bo Gan
  4 siblings, 1 reply; 8+ messages in thread
From: Anirudh Srinivasan @ 2026-06-09 22:02 UTC (permalink / raw)
  To: Bo Gan; +Cc: opensbi, wangruikang, dramforever, andrew.jones, cleger, pjw

Hi Bo,

On Tue, Jun 9, 2026 at 1:02 AM Bo Gan <ganboing@gmail.com> wrote:
>
> Re-visit the vector misaligned load/store handlers and fix:
>
> a. Avoid stack overflow by using a small sliding mask[] buffer,
>    thus optimizes stack usage *IMPORTANT* (correctness). There's no-
>    longer a need to have a pre-defined vlen maximum, and worry about
>    whether the stack can hold the mask[] variable.
>
> b. Maintain the value of vstart when redirecting uptrap. (optmization)
>    Avoids doing duplicate work when the instruction gets restarted.
>
> c. Explicitly set VS dirty in (V)SSTATUS. (correctness), VS in
>    VSSTATUS must be set dirty if coming from V=1.
>
> d. Zero out tinst in uptrap if not guest-page fault (correctness).
>
> This is a follow up patch to [1] and should be applied on top.
> [1] https://lore.kernel.org/opensbi/CAEev2e_Vg1mMP4mOKanFX_EGeUzpczRcWW++vBBuN1CfyM0BJw@mail.gmail.com/T/#t
> ---
> v2: Fix the wrong PATCH 4/4 generated in v1.

Testing on Tenstorrent Blackhole with Sifive X280 cores.

After adding some logging like this, I'm still able to break the boot
(like I'd reported on your previous patch). Full logs here
https://gist.github.com/asrinivasanTT/120646cbb7194e7b3505428ebefbdb30

diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
index 0f29dcf9..0f73c339 100644
--- a/lib/sbi/sbi_trap_v_ldst.c
+++ b/lib/sbi/sbi_trap_v_ldst.c
@@ -17,6 +17,7 @@
 #include <sbi/sbi_trap.h>
 #include <sbi/sbi_unpriv.h>
 #include <sbi/sbi_vector.h>
+#include <sbi/sbi_console.h>

 #ifdef OPENSBI_CC_SUPPORT_VECTOR

@@ -163,6 +164,8 @@ static inline void
sbi_misaligned_v_tinst_fixup(struct sbi_trap_info *uptrap)

 int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
 {
+       sbi_printf("%s: insn=0x%lx mepc=0x%lx mtval=0x%lx\n",
+                  __func__, insn, tcntx->regs.mepc, tcntx->trap.tval);
        struct sbi_trap_regs *regs = &tcntx->regs;
        struct sbi_trap_info uptrap;
        ulong vl = csr_read(CSR_VL);
@@ -276,6 +279,8 @@ done:

 int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
 {
+       sbi_printf("%s: insn=0x%lx mepc=0x%lx mtval=0x%lx\n",
+               __func__, insn, tcntx->regs.mepc, tcntx->trap.tval);
        struct sbi_trap_regs *regs = &tcntx->regs;
        struct sbi_trap_info uptrap;
        ulong vl = csr_read(CSR_VL);

Is this expected? Are the logging prints causing the overflow?

If I don't have this logging prints added, I'm able to boot fine into
linux. I was able to do this with your last patch, so this patch
doesn't change much in that aspect.

>
> ---
> Bo Gan (4):
>   lib: sbi: cosmetic changes to reduce indentation
>   lib: sbi: Rework and split sbi_misaligned(_v)_tinst_fixup
>   lib: sbi: Add variable-length unprivilege access functions
>   lib: sbi: Rework misaligned vector load/store
>
>  include/sbi/sbi_trap_ldst.h |   3 -
>  include/sbi/sbi_unpriv.h    |  16 +++
>  include/sbi/sbi_vector.h    |   6 ++
>  lib/sbi/sbi_trap_ldst.c     |  66 +++++++-----
>  lib/sbi/sbi_trap_v_ldst.c   | 201 ++++++++++++++++++++++--------------
>  lib/sbi/sbi_unpriv.c        |  88 ++++++++++++++--
>  6 files changed, 270 insertions(+), 110 deletions(-)
>
> --
> 2.34.1
>

-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 0/4] Fixes for vector misaligned load/store handlers
  2026-06-09 22:02 ` [PATCH v2 0/4] Fixes for vector misaligned load/store handlers Anirudh Srinivasan
@ 2026-06-09 23:54   ` Bo Gan
  2026-06-09 23:59     ` Anirudh Srinivasan
  0 siblings, 1 reply; 8+ messages in thread
From: Bo Gan @ 2026-06-09 23:54 UTC (permalink / raw)
  To: Anirudh Srinivasan, Bo Gan
  Cc: opensbi, wangruikang, dramforever, andrew.jones, cleger, pjw

Hi Anirudh,

On 6/9/26 15:02, Anirudh Srinivasan wrote:
> Hi Bo,
> 
> On Tue, Jun 9, 2026 at 1:02 AM Bo Gan <ganboing@gmail.com> wrote:
>>
>> Re-visit the vector misaligned load/store handlers and fix:
>>
>> a. Avoid stack overflow by using a small sliding mask[] buffer,
>>     thus optimizes stack usage *IMPORTANT* (correctness). There's no-
>>     longer a need to have a pre-defined vlen maximum, and worry about
>>     whether the stack can hold the mask[] variable.
>>
>> b. Maintain the value of vstart when redirecting uptrap. (optmization)
>>     Avoids doing duplicate work when the instruction gets restarted.
>>
>> c. Explicitly set VS dirty in (V)SSTATUS. (correctness), VS in
>>     VSSTATUS must be set dirty if coming from V=1.
>>
>> d. Zero out tinst in uptrap if not guest-page fault (correctness).
>>
>> This is a follow up patch to [1] and should be applied on top.
>> [1] https://lore.kernel.org/opensbi/CAEev2e_Vg1mMP4mOKanFX_EGeUzpczRcWW++vBBuN1CfyM0BJw@mail.gmail.com/T/#t
>> ---
>> v2: Fix the wrong PATCH 4/4 generated in v1.
> 
> Testing on Tenstorrent Blackhole with Sifive X280 cores.
> 
> After adding some logging like this, I'm still able to break the boot
> (like I'd reported on your previous patch). Full logs here
> https://gist.github.com/asrinivasanTT/120646cbb7194e7b3505428ebefbdb30
> 
> diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
> index 0f29dcf9..0f73c339 100644
> --- a/lib/sbi/sbi_trap_v_ldst.c
> +++ b/lib/sbi/sbi_trap_v_ldst.c
> @@ -17,6 +17,7 @@
>   #include <sbi/sbi_trap.h>
>   #include <sbi/sbi_unpriv.h>
>   #include <sbi/sbi_vector.h>
> +#include <sbi/sbi_console.h>
> 
>   #ifdef OPENSBI_CC_SUPPORT_VECTOR
> 
> @@ -163,6 +164,8 @@ static inline void
> sbi_misaligned_v_tinst_fixup(struct sbi_trap_info *uptrap)
> 
>   int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
>   {
> +       sbi_printf("%s: insn=0x%lx mepc=0x%lx mtval=0x%lx\n",
> +                  __func__, insn, tcntx->regs.mepc, tcntx->trap.tval);
>          struct sbi_trap_regs *regs = &tcntx->regs;
>          struct sbi_trap_info uptrap;
>          ulong vl = csr_read(CSR_VL);
> @@ -276,6 +279,8 @@ done:
> 
>   int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
>   {
> +       sbi_printf("%s: insn=0x%lx mepc=0x%lx mtval=0x%lx\n",
> +               __func__, insn, tcntx->regs.mepc, tcntx->trap.tval);
>          struct sbi_trap_regs *regs = &tcntx->regs;
>          struct sbi_trap_info uptrap;
>          ulong vl = csr_read(CSR_VL);
> 
> Is this expected? Are the logging prints causing the overflow?
> 
> If I don't have this logging prints added, I'm able to boot fine into
> linux. I was able to do this with your last patch, so this patch
> doesn't change much in that aspect.
> 

As discussed in the IRC DM, it's most likely a Linux race condition bug.
Adding the prints delays the probe function significantly, and the page
containing the function, marked as __init, can be concurrently unmapped
and free'd, thus triggering instruction page fault, cause=0xc. Can you
confirm that removing __init (as a hack) fixes the problem? Looks like
you've tried it already and the issue seems to be gone. Can you confirm?

It definitely warrants a proper fix to upstream linux. It can't rely on
timing to avoid a crash like this.

>>
>> ---
>> Bo Gan (4):
>>    lib: sbi: cosmetic changes to reduce indentation
>>    lib: sbi: Rework and split sbi_misaligned(_v)_tinst_fixup
>>    lib: sbi: Add variable-length unprivilege access functions
>>    lib: sbi: Rework misaligned vector load/store
>>
>>   include/sbi/sbi_trap_ldst.h |   3 -
>>   include/sbi/sbi_unpriv.h    |  16 +++
>>   include/sbi/sbi_vector.h    |   6 ++
>>   lib/sbi/sbi_trap_ldst.c     |  66 +++++++-----
>>   lib/sbi/sbi_trap_v_ldst.c   | 201 ++++++++++++++++++++++--------------
>>   lib/sbi/sbi_unpriv.c        |  88 ++++++++++++++--
>>   6 files changed, 270 insertions(+), 110 deletions(-)
>>
>> --
>> 2.34.1
>>

Bo


-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 0/4] Fixes for vector misaligned load/store handlers
  2026-06-09 23:54   ` Bo Gan
@ 2026-06-09 23:59     ` Anirudh Srinivasan
  0 siblings, 0 replies; 8+ messages in thread
From: Anirudh Srinivasan @ 2026-06-09 23:59 UTC (permalink / raw)
  To: Bo Gan; +Cc: opensbi, wangruikang, dramforever, andrew.jones, cleger, pjw

On Tue, Jun 9, 2026 at 6:49 PM Bo Gan <ganboing@gmail.com> wrote:
>
> Hi Anirudh,
>
> On 6/9/26 15:02, Anirudh Srinivasan wrote:
> > Hi Bo,
> >
> > On Tue, Jun 9, 2026 at 1:02 AM Bo Gan <ganboing@gmail.com> wrote:
> >>
> >> Re-visit the vector misaligned load/store handlers and fix:
> >>
> >> a. Avoid stack overflow by using a small sliding mask[] buffer,
> >>     thus optimizes stack usage *IMPORTANT* (correctness). There's no-
> >>     longer a need to have a pre-defined vlen maximum, and worry about
> >>     whether the stack can hold the mask[] variable.
> >>
> >> b. Maintain the value of vstart when redirecting uptrap. (optmization)
> >>     Avoids doing duplicate work when the instruction gets restarted.
> >>
> >> c. Explicitly set VS dirty in (V)SSTATUS. (correctness), VS in
> >>     VSSTATUS must be set dirty if coming from V=1.
> >>
> >> d. Zero out tinst in uptrap if not guest-page fault (correctness).
> >>
> >> This is a follow up patch to [1] and should be applied on top.
> >> [1] https://lore.kernel.org/opensbi/CAEev2e_Vg1mMP4mOKanFX_EGeUzpczRcWW++vBBuN1CfyM0BJw@mail.gmail.com/T/#t
> >> ---
> >> v2: Fix the wrong PATCH 4/4 generated in v1.
> >
> > Testing on Tenstorrent Blackhole with Sifive X280 cores.
> >
> > After adding some logging like this, I'm still able to break the boot
> > (like I'd reported on your previous patch). Full logs here
> > https://gist.github.com/asrinivasanTT/120646cbb7194e7b3505428ebefbdb30
> >
> > diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
> > index 0f29dcf9..0f73c339 100644
> > --- a/lib/sbi/sbi_trap_v_ldst.c
> > +++ b/lib/sbi/sbi_trap_v_ldst.c
> > @@ -17,6 +17,7 @@
> >   #include <sbi/sbi_trap.h>
> >   #include <sbi/sbi_unpriv.h>
> >   #include <sbi/sbi_vector.h>
> > +#include <sbi/sbi_console.h>
> >
> >   #ifdef OPENSBI_CC_SUPPORT_VECTOR
> >
> > @@ -163,6 +164,8 @@ static inline void
> > sbi_misaligned_v_tinst_fixup(struct sbi_trap_info *uptrap)
> >
> >   int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
> >   {
> > +       sbi_printf("%s: insn=0x%lx mepc=0x%lx mtval=0x%lx\n",
> > +                  __func__, insn, tcntx->regs.mepc, tcntx->trap.tval);
> >          struct sbi_trap_regs *regs = &tcntx->regs;
> >          struct sbi_trap_info uptrap;
> >          ulong vl = csr_read(CSR_VL);
> > @@ -276,6 +279,8 @@ done:
> >
> >   int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
> >   {
> > +       sbi_printf("%s: insn=0x%lx mepc=0x%lx mtval=0x%lx\n",
> > +               __func__, insn, tcntx->regs.mepc, tcntx->trap.tval);
> >          struct sbi_trap_regs *regs = &tcntx->regs;
> >          struct sbi_trap_info uptrap;
> >          ulong vl = csr_read(CSR_VL);
> >
> > Is this expected? Are the logging prints causing the overflow?
> >
> > If I don't have this logging prints added, I'm able to boot fine into
> > linux. I was able to do this with your last patch, so this patch
> > doesn't change much in that aspect.
> >
>
> As discussed in the IRC DM, it's most likely a Linux race condition bug.
> Adding the prints delays the probe function significantly, and the page
> containing the function, marked as __init, can be concurrently unmapped
> and free'd, thus triggering instruction page fault, cause=0xc. Can you
> confirm that removing __init (as a hack) fixes the problem? Looks like
> you've tried it already and the issue seems to be gone. Can you confirm?

Yup, removing __init fixes it. It's a bug in linux.

Tested-by: Anirudh Srinivasan <asrinivasan@oss.tenstorrent.com>


On Tue, Jun 9, 2026 at 6:49 PM Bo Gan <ganboing@gmail.com> wrote:
>
> Hi Anirudh,
>
> On 6/9/26 15:02, Anirudh Srinivasan wrote:
> > Hi Bo,
> >
> > On Tue, Jun 9, 2026 at 1:02 AM Bo Gan <ganboing@gmail.com> wrote:
> >>
> >> Re-visit the vector misaligned load/store handlers and fix:
> >>
> >> a. Avoid stack overflow by using a small sliding mask[] buffer,
> >>     thus optimizes stack usage *IMPORTANT* (correctness). There's no-
> >>     longer a need to have a pre-defined vlen maximum, and worry about
> >>     whether the stack can hold the mask[] variable.
> >>
> >> b. Maintain the value of vstart when redirecting uptrap. (optmization)
> >>     Avoids doing duplicate work when the instruction gets restarted.
> >>
> >> c. Explicitly set VS dirty in (V)SSTATUS. (correctness), VS in
> >>     VSSTATUS must be set dirty if coming from V=1.
> >>
> >> d. Zero out tinst in uptrap if not guest-page fault (correctness).
> >>
> >> This is a follow up patch to [1] and should be applied on top.
> >> [1] https://lore.kernel.org/opensbi/CAEev2e_Vg1mMP4mOKanFX_EGeUzpczRcWW++vBBuN1CfyM0BJw@mail.gmail.com/T/#t
> >> ---
> >> v2: Fix the wrong PATCH 4/4 generated in v1.
> >
> > Testing on Tenstorrent Blackhole with Sifive X280 cores.
> >
> > After adding some logging like this, I'm still able to break the boot
> > (like I'd reported on your previous patch). Full logs here
> > https://gist.github.com/asrinivasanTT/120646cbb7194e7b3505428ebefbdb30
> >
> > diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
> > index 0f29dcf9..0f73c339 100644
> > --- a/lib/sbi/sbi_trap_v_ldst.c
> > +++ b/lib/sbi/sbi_trap_v_ldst.c
> > @@ -17,6 +17,7 @@
> >   #include <sbi/sbi_trap.h>
> >   #include <sbi/sbi_unpriv.h>
> >   #include <sbi/sbi_vector.h>
> > +#include <sbi/sbi_console.h>
> >
> >   #ifdef OPENSBI_CC_SUPPORT_VECTOR
> >
> > @@ -163,6 +164,8 @@ static inline void
> > sbi_misaligned_v_tinst_fixup(struct sbi_trap_info *uptrap)
> >
> >   int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx)
> >   {
> > +       sbi_printf("%s: insn=0x%lx mepc=0x%lx mtval=0x%lx\n",
> > +                  __func__, insn, tcntx->regs.mepc, tcntx->trap.tval);
> >          struct sbi_trap_regs *regs = &tcntx->regs;
> >          struct sbi_trap_info uptrap;
> >          ulong vl = csr_read(CSR_VL);
> > @@ -276,6 +279,8 @@ done:
> >
> >   int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx)
> >   {
> > +       sbi_printf("%s: insn=0x%lx mepc=0x%lx mtval=0x%lx\n",
> > +               __func__, insn, tcntx->regs.mepc, tcntx->trap.tval);
> >          struct sbi_trap_regs *regs = &tcntx->regs;
> >          struct sbi_trap_info uptrap;
> >          ulong vl = csr_read(CSR_VL);
> >
> > Is this expected? Are the logging prints causing the overflow?
> >
> > If I don't have this logging prints added, I'm able to boot fine into
> > linux. I was able to do this with your last patch, so this patch
> > doesn't change much in that aspect.
> >
>
> As discussed in the IRC DM, it's most likely a Linux race condition bug.
> Adding the prints delays the probe function significantly, and the page
> containing the function, marked as __init, can be concurrently unmapped
> and free'd, thus triggering instruction page fault, cause=0xc. Can you
> confirm that removing __init (as a hack) fixes the problem? Looks like
> you've tried it already and the issue seems to be gone. Can you confirm?
>
> It definitely warrants a proper fix to upstream linux. It can't rely on
> timing to avoid a crash like this.
>
> >>
> >> ---
> >> Bo Gan (4):
> >>    lib: sbi: cosmetic changes to reduce indentation
> >>    lib: sbi: Rework and split sbi_misaligned(_v)_tinst_fixup
> >>    lib: sbi: Add variable-length unprivilege access functions
> >>    lib: sbi: Rework misaligned vector load/store
> >>
> >>   include/sbi/sbi_trap_ldst.h |   3 -
> >>   include/sbi/sbi_unpriv.h    |  16 +++
> >>   include/sbi/sbi_vector.h    |   6 ++
> >>   lib/sbi/sbi_trap_ldst.c     |  66 +++++++-----
> >>   lib/sbi/sbi_trap_v_ldst.c   | 201 ++++++++++++++++++++++--------------
> >>   lib/sbi/sbi_unpriv.c        |  88 ++++++++++++++--
> >>   6 files changed, 270 insertions(+), 110 deletions(-)
> >>
> >> --
> >> 2.34.1
> >>
>
> Bo
>

-- 
opensbi mailing list
opensbi@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/opensbi

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2026-06-09 23:59 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-09  6:00 [PATCH v2 0/4] Fixes for vector misaligned load/store handlers Bo Gan
2026-06-09  6:00 ` [PATCH v2 1/4] lib: sbi: cosmetic changes to reduce indentation Bo Gan
2026-06-09  6:00 ` [PATCH v2 2/4] lib: sbi: Rework and split sbi_misaligned(_v)_tinst_fixup Bo Gan
2026-06-09  6:00 ` [PATCH v2 3/4] lib: sbi: Add variable-length unprivilege access functions Bo Gan
2026-06-09  6:00 ` [PATCH v2 4/4] lib: sbi: Rework misaligned vector load/store Bo Gan
2026-06-09 22:02 ` [PATCH v2 0/4] Fixes for vector misaligned load/store handlers Anirudh Srinivasan
2026-06-09 23:54   ` Bo Gan
2026-06-09 23:59     ` Anirudh Srinivasan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox