* [PATCH v3 01/12] accel/tcg: Move {set, clear}_helper_retaddr to cpu_ldst.h
2024-07-19 1:06 [PATCH v3 00/12] Fixes for user-only munmap races Richard Henderson
@ 2024-07-19 1:06 ` Richard Henderson
2024-07-19 1:06 ` [PATCH v3 02/12] target/arm: Use set/clear_helper_retaddr in helper-a64.c Richard Henderson
` (10 subsequent siblings)
11 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2024-07-19 1:06 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv, Peter Maydell
Use of these in helpers goes hand-in-hand with tlb_vaddr_to_host
and other probing functions.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/user-retaddr.h | 28 ----------------------------
include/exec/cpu_ldst.h | 34 ++++++++++++++++++++++++++++++++++
accel/tcg/cpu-exec.c | 3 ---
accel/tcg/user-exec.c | 1 -
4 files changed, 34 insertions(+), 32 deletions(-)
delete mode 100644 accel/tcg/user-retaddr.h
diff --git a/accel/tcg/user-retaddr.h b/accel/tcg/user-retaddr.h
deleted file mode 100644
index e0f57e1994..0000000000
--- a/accel/tcg/user-retaddr.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef ACCEL_TCG_USER_RETADDR_H
-#define ACCEL_TCG_USER_RETADDR_H
-
-#include "qemu/atomic.h"
-
-extern __thread uintptr_t helper_retaddr;
-
-static inline void set_helper_retaddr(uintptr_t ra)
-{
- helper_retaddr = ra;
- /*
- * Ensure that this write is visible to the SIGSEGV handler that
- * may be invoked due to a subsequent invalid memory operation.
- */
- signal_barrier();
-}
-
-static inline void clear_helper_retaddr(void)
-{
- /*
- * Ensure that previous memory operations have succeeded before
- * removing the data visible to the signal handler.
- */
- signal_barrier();
- helper_retaddr = 0;
-}
-
-#endif
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index 71009f84f5..dac12bd8eb 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -379,4 +379,38 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
MMUAccessType access_type, int mmu_idx);
#endif
+/*
+ * For user-only, helpers that use guest to host address translation
+ * must protect the actual host memory access by recording 'retaddr'
+ * for the signal handler. This is required for a race condition in
+ * which another thread unmaps the page between a probe and the
+ * actual access.
+ */
+#ifdef CONFIG_USER_ONLY
+extern __thread uintptr_t helper_retaddr;
+
+static inline void set_helper_retaddr(uintptr_t ra)
+{
+ helper_retaddr = ra;
+ /*
+ * Ensure that this write is visible to the SIGSEGV handler that
+ * may be invoked due to a subsequent invalid memory operation.
+ */
+ signal_barrier();
+}
+
+static inline void clear_helper_retaddr(void)
+{
+ /*
+ * Ensure that previous memory operations have succeeded before
+ * removing the data visible to the signal handler.
+ */
+ signal_barrier();
+ helper_retaddr = 0;
+}
+#else
+#define set_helper_retaddr(ra) do { } while (0)
+#define clear_helper_retaddr() do { } while (0)
+#endif
+
#endif /* CPU_LDST_H */
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 9010dad073..8163295f34 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -41,9 +41,6 @@
#include "tb-context.h"
#include "internal-common.h"
#include "internal-target.h"
-#if defined(CONFIG_USER_ONLY)
-#include "user-retaddr.h"
-#endif
/* -icount align implementation. */
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 80d24540ed..7ddc47b0ba 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -33,7 +33,6 @@
#include "tcg/tcg-ldst.h"
#include "internal-common.h"
#include "internal-target.h"
-#include "user-retaddr.h"
__thread uintptr_t helper_retaddr;
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v3 02/12] target/arm: Use set/clear_helper_retaddr in helper-a64.c
2024-07-19 1:06 [PATCH v3 00/12] Fixes for user-only munmap races Richard Henderson
2024-07-19 1:06 ` [PATCH v3 01/12] accel/tcg: Move {set, clear}_helper_retaddr to cpu_ldst.h Richard Henderson
@ 2024-07-19 1:06 ` Richard Henderson
2024-07-19 1:06 ` [PATCH v3 03/12] target/arm: Use set/clear_helper_retaddr in SVE and SME helpers Richard Henderson
` (9 subsequent siblings)
11 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2024-07-19 1:06 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv, Peter Maydell
Use these in helper_dc_dva and the FEAT_MOPS routines to
avoid a race condition with munmap in another thread.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/helper-a64.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index 0ea8668ab4..c60d2a7ec9 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -928,6 +928,8 @@ uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
{
+ uintptr_t ra = GETPC();
+
/*
* Implement DC ZVA, which zeroes a fixed-length block of memory.
* Note that we do not implement the (architecturally mandated)
@@ -948,8 +950,6 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
#ifndef CONFIG_USER_ONLY
if (unlikely(!mem)) {
- uintptr_t ra = GETPC();
-
/*
* Trap if accessing an invalid page. DC_ZVA requires that we supply
* the original pointer for an invalid page. But watchpoints require
@@ -971,7 +971,9 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
}
#endif
+ set_helper_retaddr(ra);
memset(mem, 0, blocklen);
+ clear_helper_retaddr();
}
void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr,
@@ -1120,7 +1122,9 @@ static uint64_t set_step(CPUARMState *env, uint64_t toaddr,
}
#endif
/* Easy case: just memset the host memory */
+ set_helper_retaddr(ra);
memset(mem, data, setsize);
+ clear_helper_retaddr();
return setsize;
}
@@ -1163,7 +1167,9 @@ static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr,
}
#endif
/* Easy case: just memset the host memory */
+ set_helper_retaddr(ra);
memset(mem, data, setsize);
+ clear_helper_retaddr();
mte_mops_set_tags(env, toaddr, setsize, *mtedesc);
return setsize;
}
@@ -1497,7 +1503,9 @@ static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr,
}
#endif
/* Easy case: just memmove the host memory */
+ set_helper_retaddr(ra);
memmove(wmem, rmem, copysize);
+ clear_helper_retaddr();
return copysize;
}
@@ -1572,7 +1580,9 @@ static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr,
* Easy case: just memmove the host memory. Note that wmem and
* rmem here point to the *last* byte to copy.
*/
+ set_helper_retaddr(ra);
memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize);
+ clear_helper_retaddr();
return copysize;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v3 03/12] target/arm: Use set/clear_helper_retaddr in SVE and SME helpers
2024-07-19 1:06 [PATCH v3 00/12] Fixes for user-only munmap races Richard Henderson
2024-07-19 1:06 ` [PATCH v3 01/12] accel/tcg: Move {set, clear}_helper_retaddr to cpu_ldst.h Richard Henderson
2024-07-19 1:06 ` [PATCH v3 02/12] target/arm: Use set/clear_helper_retaddr in helper-a64.c Richard Henderson
@ 2024-07-19 1:06 ` Richard Henderson
2024-07-22 11:56 ` Peter Maydell
2024-07-19 1:06 ` [PATCH v3 04/12] target/ppc/mem_helper.c: Remove a conditional from dcbz_common() Richard Henderson
` (8 subsequent siblings)
11 siblings, 1 reply; 20+ messages in thread
From: Richard Henderson @ 2024-07-19 1:06 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv
Avoid a race condition with munmap in another thread.
Use around blocks that exclusively use "host_fn".
Keep the blocks as small as possible, but without setting
and clearing for every operation on one page.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/sme_helper.c | 16 ++++++++++++++
target/arm/tcg/sve_helper.c | 42 +++++++++++++++++++++++++++++--------
2 files changed, 49 insertions(+), 9 deletions(-)
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index e2e0575039..ab40ced38f 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -517,6 +517,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
clr_fn(za, 0, reg_off);
}
+ set_helper_retaddr(ra);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -529,6 +531,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -543,6 +547,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(ra);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -554,6 +560,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
reg_off += esize;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
@@ -701,6 +709,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(ra);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -711,6 +721,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -725,6 +737,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(ra);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -734,6 +748,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
reg_off += 1 << esz;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index dd49e67d7a..f1ee0e060f 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -5738,6 +5738,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(retaddr);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -5752,6 +5754,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -5771,6 +5775,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(retaddr);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -5784,6 +5790,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
mem_off += N << msz;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
@@ -5934,15 +5942,11 @@ DO_LDN_2(4, dd, MO_64)
/*
* Load contiguous data, first-fault and no-fault.
*
- * For user-only, one could argue that we should hold the mmap_lock during
- * the operation so that there is no race between page_check_range and the
- * load operation. However, unmapping pages out from under a running thread
- * is extraordinarily unlikely. This theoretical race condition also affects
- * linux-user/ in its get_user/put_user macros.
- *
- * TODO: Construct some helpers, written in assembly, that interact with
- * host_signal_handler to produce memory ops which can properly report errors
- * without racing.
+ * For user-only, we control the race between page_check_range and
+ * another thread's munmap by using set/clear_helper_retaddr. Any
+ * SEGV that occurs between those markers is assumed to be because
+ * the guest page vanished. Keep that block as small as possible
+ * so that unrelated QEMU bugs are not blamed on the guest.
*/
/* Fault on byte I. All bits in FFR from I are cleared. The vector
@@ -6093,6 +6097,8 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(retaddr);
+
do {
uint64_t pg = *(uint64_t *)(vg + (reg_off >> 3));
do {
@@ -6101,9 +6107,11 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
(cpu_watchpoint_address_matches
(env_cpu(env), addr + mem_off, 1 << msz)
& BP_MEM_READ)) {
+ clear_helper_retaddr();
goto do_fault;
}
if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) {
+ clear_helper_retaddr();
goto do_fault;
}
host_fn(vd, reg_off, host + mem_off);
@@ -6113,6 +6121,8 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
} while (reg_off <= reg_last && (reg_off & 63));
} while (reg_off <= reg_last);
+ clear_helper_retaddr();
+
/*
* MemSingleNF is allowed to fail for any reason. We have special
* code above to handle the first element crossing a page boundary.
@@ -6348,6 +6358,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(retaddr);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -6362,6 +6374,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -6381,6 +6395,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(retaddr);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -6394,6 +6410,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
mem_off += N << msz;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
@@ -6560,7 +6578,9 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
if (unlikely(info.flags & TLB_MMIO)) {
tlb_fn(env, &scratch, reg_off, addr, retaddr);
} else {
+ set_helper_retaddr(retaddr);
host_fn(&scratch, reg_off, info.host);
+ clear_helper_retaddr();
}
} else {
/* Element crosses the page boundary. */
@@ -6782,7 +6802,9 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
goto fault;
}
+ set_helper_retaddr(retaddr);
host_fn(vd, reg_off, info.host);
+ clear_helper_retaddr();
}
reg_off += esize;
} while (reg_off & 63);
@@ -6986,7 +7008,9 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
do {
void *h = host[i];
if (likely(h != NULL)) {
+ set_helper_retaddr(retaddr);
host_fn(vd, reg_off, h);
+ clear_helper_retaddr();
} else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) {
target_ulong addr = base + (off_fn(vm, reg_off) << scale);
tlb_fn(env, vd, reg_off, addr, retaddr);
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v3 04/12] target/ppc/mem_helper.c: Remove a conditional from dcbz_common()
2024-07-19 1:06 [PATCH v3 00/12] Fixes for user-only munmap races Richard Henderson
` (2 preceding siblings ...)
2024-07-19 1:06 ` [PATCH v3 03/12] target/arm: Use set/clear_helper_retaddr in SVE and SME helpers Richard Henderson
@ 2024-07-19 1:06 ` Richard Henderson
2024-07-19 1:07 ` [PATCH v3 05/12] target/ppc: Hoist dcbz_size out of dcbz_common Richard Henderson
` (7 subsequent siblings)
11 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2024-07-19 1:06 UTC (permalink / raw)
To: qemu-devel
Cc: qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv, BALATON Zoltan,
Nicholas Piggin
From: BALATON Zoltan <balaton@eik.bme.hu>
Instead of passing a bool and select a value within dcbz_common() let
the callers pass in the right value to avoid this conditional
statement. On PPC dcbz is often used to zero memory and some code uses
it a lot. This change improves the run time of a test case that copies
memory with a dcbz call in every iteration from 6.23 to 5.83 seconds.
Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
Message-Id: <20240622204833.5F7C74E6000@zero.eik.bme.hu>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
---
target/ppc/mem_helper.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index f88155ad45..361fd72226 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -271,12 +271,11 @@ void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
}
static void dcbz_common(CPUPPCState *env, target_ulong addr,
- uint32_t opcode, bool epid, uintptr_t retaddr)
+ uint32_t opcode, int mmu_idx, uintptr_t retaddr)
{
target_ulong mask, dcbz_size = env->dcache_line_size;
uint32_t i;
void *haddr;
- int mmu_idx = epid ? PPC_TLB_EPID_STORE : ppc_env_mmu_index(env, false);
#if defined(TARGET_PPC64)
/* Check for dcbz vs dcbzl on 970 */
@@ -309,12 +308,12 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode)
{
- dcbz_common(env, addr, opcode, false, GETPC());
+ dcbz_common(env, addr, opcode, ppc_env_mmu_index(env, false), GETPC());
}
void helper_dcbzep(CPUPPCState *env, target_ulong addr, uint32_t opcode)
{
- dcbz_common(env, addr, opcode, true, GETPC());
+ dcbz_common(env, addr, opcode, PPC_TLB_EPID_STORE, GETPC());
}
void helper_icbi(CPUPPCState *env, target_ulong addr)
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v3 05/12] target/ppc: Hoist dcbz_size out of dcbz_common
2024-07-19 1:06 [PATCH v3 00/12] Fixes for user-only munmap races Richard Henderson
` (3 preceding siblings ...)
2024-07-19 1:06 ` [PATCH v3 04/12] target/ppc/mem_helper.c: Remove a conditional from dcbz_common() Richard Henderson
@ 2024-07-19 1:07 ` Richard Henderson
2024-07-19 1:07 ` [PATCH v3 06/12] target/ppc: Split out helper_dbczl for 970 Richard Henderson
` (6 subsequent siblings)
11 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2024-07-19 1:07 UTC (permalink / raw)
To: qemu-devel
Cc: qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv, Nicholas Piggin,
BALATON Zoltan
The 970 logic does not apply to dcbzep, which is an e500 insn.
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: BALATON Zoltan <balaton@eik.bme.hu>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/ppc/mem_helper.c | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 361fd72226..5067919ff8 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -271,22 +271,12 @@ void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
}
static void dcbz_common(CPUPPCState *env, target_ulong addr,
- uint32_t opcode, int mmu_idx, uintptr_t retaddr)
+ int dcbz_size, int mmu_idx, uintptr_t retaddr)
{
- target_ulong mask, dcbz_size = env->dcache_line_size;
- uint32_t i;
+ target_ulong mask = ~(target_ulong)(dcbz_size - 1);
void *haddr;
-#if defined(TARGET_PPC64)
- /* Check for dcbz vs dcbzl on 970 */
- if (env->excp_model == POWERPC_EXCP_970 &&
- !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
- dcbz_size = 32;
- }
-#endif
-
/* Align address */
- mask = ~(dcbz_size - 1);
addr &= mask;
/* Check reservation */
@@ -300,7 +290,7 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
memset(haddr, 0, dcbz_size);
} else {
/* Slow path */
- for (i = 0; i < dcbz_size; i += 8) {
+ for (int i = 0; i < dcbz_size; i += 8) {
cpu_stq_mmuidx_ra(env, addr + i, 0, mmu_idx, retaddr);
}
}
@@ -308,12 +298,22 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode)
{
- dcbz_common(env, addr, opcode, ppc_env_mmu_index(env, false), GETPC());
+ int dcbz_size = env->dcache_line_size;
+
+#if defined(TARGET_PPC64)
+ /* Check for dcbz vs dcbzl on 970 */
+ if (env->excp_model == POWERPC_EXCP_970 &&
+ !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
+ dcbz_size = 32;
+ }
+#endif
+
+ dcbz_common(env, addr, dcbz_size, ppc_env_mmu_index(env, false), GETPC());
}
void helper_dcbzep(CPUPPCState *env, target_ulong addr, uint32_t opcode)
{
- dcbz_common(env, addr, opcode, PPC_TLB_EPID_STORE, GETPC());
+ dcbz_common(env, addr, env->dcache_line_size, PPC_TLB_EPID_STORE, GETPC());
}
void helper_icbi(CPUPPCState *env, target_ulong addr)
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v3 06/12] target/ppc: Split out helper_dbczl for 970
2024-07-19 1:06 [PATCH v3 00/12] Fixes for user-only munmap races Richard Henderson
` (4 preceding siblings ...)
2024-07-19 1:07 ` [PATCH v3 05/12] target/ppc: Hoist dcbz_size out of dcbz_common Richard Henderson
@ 2024-07-19 1:07 ` Richard Henderson
2024-10-20 21:20 ` Guenter Roeck
2024-07-19 1:07 ` [PATCH v3 07/12] target/ppc: Merge helper_{dcbz,dcbzep} Richard Henderson
` (5 subsequent siblings)
11 siblings, 1 reply; 20+ messages in thread
From: Richard Henderson @ 2024-07-19 1:07 UTC (permalink / raw)
To: qemu-devel
Cc: qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv, Nicholas Piggin,
BALATON Zoltan
We can determine at translation time whether the insn is or
is not dbczl. We must retain a runtime check against the
HID5 register, but we can move that to a separate function
that never affects other ppc models.
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: BALATON Zoltan <balaton@eik.bme.hu>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/ppc/helper.h | 7 +++++--
target/ppc/mem_helper.c | 34 +++++++++++++++++++++-------------
target/ppc/translate.c | 24 ++++++++++++++----------
3 files changed, 40 insertions(+), 25 deletions(-)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 76b8f25c77..afc56855ff 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -46,8 +46,11 @@ DEF_HELPER_FLAGS_3(stmw, TCG_CALL_NO_WG, void, env, tl, i32)
DEF_HELPER_4(lsw, void, env, tl, i32, i32)
DEF_HELPER_5(lswx, void, env, tl, i32, i32, i32)
DEF_HELPER_FLAGS_4(stsw, TCG_CALL_NO_WG, void, env, tl, i32, i32)
-DEF_HELPER_FLAGS_3(dcbz, TCG_CALL_NO_WG, void, env, tl, i32)
-DEF_HELPER_FLAGS_3(dcbzep, TCG_CALL_NO_WG, void, env, tl, i32)
+DEF_HELPER_FLAGS_2(dcbz, TCG_CALL_NO_WG, void, env, tl)
+DEF_HELPER_FLAGS_2(dcbzep, TCG_CALL_NO_WG, void, env, tl)
+#ifdef TARGET_PPC64
+DEF_HELPER_FLAGS_2(dcbzl, TCG_CALL_NO_WG, void, env, tl)
+#endif
DEF_HELPER_FLAGS_2(icbi, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_FLAGS_2(icbiep, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 5067919ff8..d4957efd6e 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -296,26 +296,34 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
}
}
-void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode)
+void helper_dcbz(CPUPPCState *env, target_ulong addr)
{
- int dcbz_size = env->dcache_line_size;
-
-#if defined(TARGET_PPC64)
- /* Check for dcbz vs dcbzl on 970 */
- if (env->excp_model == POWERPC_EXCP_970 &&
- !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
- dcbz_size = 32;
- }
-#endif
-
- dcbz_common(env, addr, dcbz_size, ppc_env_mmu_index(env, false), GETPC());
+ dcbz_common(env, addr, env->dcache_line_size,
+ ppc_env_mmu_index(env, false), GETPC());
}
-void helper_dcbzep(CPUPPCState *env, target_ulong addr, uint32_t opcode)
+void helper_dcbzep(CPUPPCState *env, target_ulong addr)
{
dcbz_common(env, addr, env->dcache_line_size, PPC_TLB_EPID_STORE, GETPC());
}
+#ifdef TARGET_PPC64
+void helper_dcbzl(CPUPPCState *env, target_ulong addr)
+{
+ int dcbz_size = env->dcache_line_size;
+
+ /*
+ * The translator checked for POWERPC_EXCP_970.
+ * All that's left is to check HID5.
+ */
+ if (((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
+ dcbz_size = 32;
+ }
+
+ dcbz_common(env, addr, dcbz_size, ppc_env_mmu_index(env, false), GETPC());
+}
+#endif
+
void helper_icbi(CPUPPCState *env, target_ulong addr)
{
addr &= ~(env->dcache_line_size - 1);
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 0bc16d7251..9e472ab7ef 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -178,6 +178,7 @@ struct DisasContext {
/* Translation flags */
MemOp default_tcg_memop_mask;
#if defined(TARGET_PPC64)
+ powerpc_excp_t excp_model;
bool sf_mode;
bool has_cfar;
bool has_bhrb;
@@ -4445,27 +4446,29 @@ static void gen_dcblc(DisasContext *ctx)
/* dcbz */
static void gen_dcbz(DisasContext *ctx)
{
- TCGv tcgv_addr;
- TCGv_i32 tcgv_op;
+ TCGv tcgv_addr = tcg_temp_new();
gen_set_access_type(ctx, ACCESS_CACHE);
- tcgv_addr = tcg_temp_new();
- tcgv_op = tcg_constant_i32(ctx->opcode & 0x03FF000);
gen_addr_reg_index(ctx, tcgv_addr);
- gen_helper_dcbz(tcg_env, tcgv_addr, tcgv_op);
+
+#ifdef TARGET_PPC64
+ if (ctx->excp_model == POWERPC_EXCP_970 && !(ctx->opcode & 0x00200000)) {
+ gen_helper_dcbzl(tcg_env, tcgv_addr);
+ return;
+ }
+#endif
+
+ gen_helper_dcbz(tcg_env, tcgv_addr);
}
/* dcbzep */
static void gen_dcbzep(DisasContext *ctx)
{
- TCGv tcgv_addr;
- TCGv_i32 tcgv_op;
+ TCGv tcgv_addr = tcg_temp_new();
gen_set_access_type(ctx, ACCESS_CACHE);
- tcgv_addr = tcg_temp_new();
- tcgv_op = tcg_constant_i32(ctx->opcode & 0x03FF000);
gen_addr_reg_index(ctx, tcgv_addr);
- gen_helper_dcbzep(tcg_env, tcgv_addr, tcgv_op);
+ gen_helper_dcbzep(tcg_env, tcgv_addr);
}
/* dst / dstt */
@@ -6486,6 +6489,7 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
ctx->default_tcg_memop_mask = ctx->le_mode ? MO_LE : MO_BE;
ctx->flags = env->flags;
#if defined(TARGET_PPC64)
+ ctx->excp_model = env->excp_model;
ctx->sf_mode = (hflags >> HFLAGS_64) & 1;
ctx->has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
ctx->has_bhrb = !!(env->flags & POWERPC_FLAG_BHRB);
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH v3 06/12] target/ppc: Split out helper_dbczl for 970
2024-07-19 1:07 ` [PATCH v3 06/12] target/ppc: Split out helper_dbczl for 970 Richard Henderson
@ 2024-10-20 21:20 ` Guenter Roeck
2024-10-21 10:40 ` BALATON Zoltan
0 siblings, 1 reply; 20+ messages in thread
From: Guenter Roeck @ 2024-10-20 21:20 UTC (permalink / raw)
To: Richard Henderson
Cc: qemu-devel, qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv,
Nicholas Piggin, BALATON Zoltan
Hi,
On Fri, Jul 19, 2024 at 11:07:01AM +1000, Richard Henderson wrote:
> We can determine at translation time whether the insn is or
> is not dbczl. We must retain a runtime check against the
> HID5 register, but we can move that to a separate function
> that never affects other ppc models.
>
> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> Reviewed-by: BALATON Zoltan <balaton@eik.bme.hu>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
I see an odd failure when trying to boot ppc64 images in qemu v9.1.0 and
v9.1.1.
Starting network: udhcpc: started, v1.36.1
malloc(): corrupted top size
Aborted
/usr/share/udhcpc/default.script: line 41: can't create : nonexistent directory
I bisected the problem to this patch; bisect log is attached.
I also found that the problem has been fixed in mainline qemu. Bisect
points to commit: f168808d7d10 ("accel/tcg: Add TCGCPUOps.tlb_fill_align")
as the fix. I attached this bisect log as well.
Reverting this patch isn't easy due to several follow-up commits. Applying
commit f168808d7d10 plus several preceding commits on top of v9.1.1 seems
to fix the problem. The commits I applied are
da335fe12a5d include/exec/memop: Move get_alignment_bits from tcg.h
c5809eee452b include/exec/memop: Rename get_alignment_bits
e5b063e81fd2 include/exec/memop: Introduce memop_atomicity_bits
f168808d7d10 accel/tcg: Add TCGCPUOps.tlb_fill_align
Obviously I have no idea if this is even remotely correct, so please take
this report as purely informational in case someone else observes a similar
problem.
Thanks,
Guenter
---
Bug introduced:
# bad: [fd1952d814da738ed107e05583b3e02ac11e88ff] Update version for v9.1.0 release
# good: [c25df57ae8f9fe1c72eee2dab37d76d904ac382e] Update version for 9.0.0 release
git bisect start 'v9.1.0' 'v9.0.0'
# good: [2529ea2d561ea9fe359fb19ebdcfeb8b6cddd219] hw/acpi/ich9: Remove dead code related to 'acpi_memory_hotplug'
git bisect good 2529ea2d561ea9fe359fb19ebdcfeb8b6cddd219
# good: [76e375fc3c538bd6e4232314f693b56536a50b73] docs/qapidoc: add QMP highlighting to annotated qmp-example blocks
git bisect good 76e375fc3c538bd6e4232314f693b56536a50b73
# bad: [60d30cff8472c0bf05a40b0f55221fb4efb768e2] target/ppc: Move SPR indirect registers into PnvCore
git bisect bad 60d30cff8472c0bf05a40b0f55221fb4efb768e2
# bad: [6c635326425091e164b563a7ce96408ef74ff2ec] vfio/{iommufd,container}: Remove caps::aw_bits
git bisect bad 6c635326425091e164b563a7ce96408ef74ff2ec
# good: [23fa74974d8c96bc95cbecc0d4e2d90f984939f6] Merge tag 'pull-target-arm-20240718' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
git bisect good 23fa74974d8c96bc95cbecc0d4e2d90f984939f6
# good: [c135d5eaafe7aa2533da663d8e5a34a424b71eb9] tests/tcg/aarch64: Fix test-mte.py
git bisect good c135d5eaafe7aa2533da663d8e5a34a424b71eb9
# good: [6af69d02706c821797802cfd56acdac13a7c9422] Merge tag 'nvme-next-pull-request' of https://gitlab.com/birkelund/qemu into staging
git bisect good 6af69d02706c821797802cfd56acdac13a7c9422
# bad: [71bce0e1fb1a866dde4a4b6016fc18b09f317338] Merge tag 'pull-tcg-20240723' of https://gitlab.com/rth7680/qemu into staging
git bisect bad 71bce0e1fb1a866dde4a4b6016fc18b09f317338
# bad: [62fe57c6d23fe8136d281f0e37ec8a9fab08b60a] target/ppc: Split out helper_dbczl for 970
git bisect bad 62fe57c6d23fe8136d281f0e37ec8a9fab08b60a
# good: [3b9991e35c08be7fd6b84090b2114ff1bfd44d3f] target/arm: Use set/clear_helper_retaddr in SVE and SME helpers
git bisect good 3b9991e35c08be7fd6b84090b2114ff1bfd44d3f
# good: [521a80d895ec8ef0200dcac9b9b19e60b0cc1d1a] target/ppc: Hoist dcbz_size out of dcbz_common
git bisect good 521a80d895ec8ef0200dcac9b9b19e60b0cc1d1a
# first bad commit: [62fe57c6d23fe8136d281f0e37ec8a9fab08b60a] target/ppc: Split out helper_dbczl for 970
---
Bug fixed:
# pass: [72b0b80714066a435502b67cdb55a7868ba0487d] Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
# fail: [fd1952d814da738ed107e05583b3e02ac11e88ff] Update version for v9.1.0 release
git bisect start '--term-bad=pass' '--term-good=fail' 'HEAD' 'v9.1.0'
# fail: [4ae7d11b70a840eec7aa27269093b15d04ebc84e] Merge tag 'pull-tcg-20240922' of https://gitlab.com/rth7680/qemu into staging
git bisect fail 4ae7d11b70a840eec7aa27269093b15d04ebc84e
# fail: [b5ab62b3c0050612c7f9b0b4baeb44ebab42775a] Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
git bisect fail b5ab62b3c0050612c7f9b0b4baeb44ebab42775a
# pass: [1bfb726112ea4fda07c988f08df32d1eebb9abec] ui/pixman: generalize shared_image_destroy
git bisect pass 1bfb726112ea4fda07c988f08df32d1eebb9abec
# fail: [7e3b6d8063f245d27eecce5aabe624b5785f2a77] Merge tag 'crypto-fixes-pull-request' of https://gitlab.com/berrange/qemu into staging
git bisect fail 7e3b6d8063f245d27eecce5aabe624b5785f2a77
# pass: [e530581ee06573fcf48c7f7a6c3f8ec6e5809243] target/arm: Fix alignment fault priority in get_phys_addr_lpae
git bisect pass e530581ee06573fcf48c7f7a6c3f8ec6e5809243
# pass: [795592fef7d5d66a67b95a7f45cc1a84883db6a8] accel/tcg: Use the alignment test in tlb_fill_align
git bisect pass 795592fef7d5d66a67b95a7f45cc1a84883db6a8
# fail: [9d08a70ddc08e9b6ecf870fd232531c78fe0b208] tests/tcg: Run test-proc-mappings.py on i386
git bisect fail 9d08a70ddc08e9b6ecf870fd232531c78fe0b208
# fail: [da335fe12a5da71a33d7afc2075a341f26213f53] include/exec/memop: Move get_alignment_bits from tcg.h
git bisect fail da335fe12a5da71a33d7afc2075a341f26213f53
# fail: [e5b063e81fd2b30aad1e9128238871c71b62a666] include/exec/memop: Introduce memop_atomicity_bits
git bisect fail e5b063e81fd2b30aad1e9128238871c71b62a666
# pass: [f168808d7d100ed96c52c4438c4ddb557bd086bf] accel/tcg: Add TCGCPUOps.tlb_fill_align
git bisect pass f168808d7d100ed96c52c4438c4ddb557bd086bf
# first pass commit: [f168808d7d100ed96c52c4438c4ddb557bd086bf] accel/tcg: Add TCGCPUOps.tlb_fill_align
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v3 06/12] target/ppc: Split out helper_dbczl for 970
2024-10-20 21:20 ` Guenter Roeck
@ 2024-10-21 10:40 ` BALATON Zoltan
0 siblings, 0 replies; 20+ messages in thread
From: BALATON Zoltan @ 2024-10-21 10:40 UTC (permalink / raw)
To: Guenter Roeck; +Cc: Richard Henderson, qemu-devel, qemu-ppc, Nicholas Piggin
On Sun, 20 Oct 2024, Guenter Roeck wrote:
> Hi,
>
> On Fri, Jul 19, 2024 at 11:07:01AM +1000, Richard Henderson wrote:
>> We can determine at translation time whether the insn is or
>> is not dbczl. We must retain a runtime check against the
>> HID5 register, but we can move that to a separate function
>> that never affects other ppc models.
>>
>> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>> Reviewed-by: BALATON Zoltan <balaton@eik.bme.hu>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>
> I see an odd failure when trying to boot ppc64 images in qemu v9.1.0 and
> v9.1.1.
>
> Starting network: udhcpc: started, v1.36.1
> malloc(): corrupted top size
> Aborted
Do you have more info on what reproduces this? What is the QEMU command
and what guest code is running? This looks like something may be using
dcbzl on something else than a 970 or if this is emulating a 970 then I
don't see why this stopped working. Looking at the reproducer may help.
Regards,
BALATON Zoltan
> /usr/share/udhcpc/default.script: line 41: can't create : nonexistent directory
>
> I bisected the problem to this patch; bisect log is attached.
> I also found that the problem has been fixed in mainline qemu. Bisect
> points to commit: f168808d7d10 ("accel/tcg: Add TCGCPUOps.tlb_fill_align")
> as the fix. I attached this bisect log as well.
>
> Reverting this patch isn't easy due to several follow-up commits. Applying
> commit f168808d7d10 plus several preceding commits on top of v9.1.1 seems
> to fix the problem. The commits I applied are
>
> da335fe12a5d include/exec/memop: Move get_alignment_bits from tcg.h
> c5809eee452b include/exec/memop: Rename get_alignment_bits
> e5b063e81fd2 include/exec/memop: Introduce memop_atomicity_bits
> f168808d7d10 accel/tcg: Add TCGCPUOps.tlb_fill_align
>
> Obviously I have no idea if this is even remotely correct, so please take
> this report as purely informational in case someone else observes a similar
> problem.
>
> Thanks,
> Guenter
>
> ---
> Bug introduced:
>
> # bad: [fd1952d814da738ed107e05583b3e02ac11e88ff] Update version for v9.1.0 release
> # good: [c25df57ae8f9fe1c72eee2dab37d76d904ac382e] Update version for 9.0.0 release
> git bisect start 'v9.1.0' 'v9.0.0'
> # good: [2529ea2d561ea9fe359fb19ebdcfeb8b6cddd219] hw/acpi/ich9: Remove dead code related to 'acpi_memory_hotplug'
> git bisect good 2529ea2d561ea9fe359fb19ebdcfeb8b6cddd219
> # good: [76e375fc3c538bd6e4232314f693b56536a50b73] docs/qapidoc: add QMP highlighting to annotated qmp-example blocks
> git bisect good 76e375fc3c538bd6e4232314f693b56536a50b73
> # bad: [60d30cff8472c0bf05a40b0f55221fb4efb768e2] target/ppc: Move SPR indirect registers into PnvCore
> git bisect bad 60d30cff8472c0bf05a40b0f55221fb4efb768e2
> # bad: [6c635326425091e164b563a7ce96408ef74ff2ec] vfio/{iommufd,container}: Remove caps::aw_bits
> git bisect bad 6c635326425091e164b563a7ce96408ef74ff2ec
> # good: [23fa74974d8c96bc95cbecc0d4e2d90f984939f6] Merge tag 'pull-target-arm-20240718' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
> git bisect good 23fa74974d8c96bc95cbecc0d4e2d90f984939f6
> # good: [c135d5eaafe7aa2533da663d8e5a34a424b71eb9] tests/tcg/aarch64: Fix test-mte.py
> git bisect good c135d5eaafe7aa2533da663d8e5a34a424b71eb9
> # good: [6af69d02706c821797802cfd56acdac13a7c9422] Merge tag 'nvme-next-pull-request' of https://gitlab.com/birkelund/qemu into staging
> git bisect good 6af69d02706c821797802cfd56acdac13a7c9422
> # bad: [71bce0e1fb1a866dde4a4b6016fc18b09f317338] Merge tag 'pull-tcg-20240723' of https://gitlab.com/rth7680/qemu into staging
> git bisect bad 71bce0e1fb1a866dde4a4b6016fc18b09f317338
> # bad: [62fe57c6d23fe8136d281f0e37ec8a9fab08b60a] target/ppc: Split out helper_dbczl for 970
> git bisect bad 62fe57c6d23fe8136d281f0e37ec8a9fab08b60a
> # good: [3b9991e35c08be7fd6b84090b2114ff1bfd44d3f] target/arm: Use set/clear_helper_retaddr in SVE and SME helpers
> git bisect good 3b9991e35c08be7fd6b84090b2114ff1bfd44d3f
> # good: [521a80d895ec8ef0200dcac9b9b19e60b0cc1d1a] target/ppc: Hoist dcbz_size out of dcbz_common
> git bisect good 521a80d895ec8ef0200dcac9b9b19e60b0cc1d1a
> # first bad commit: [62fe57c6d23fe8136d281f0e37ec8a9fab08b60a] target/ppc: Split out helper_dbczl for 970
>
> ---
> Bug fixed:
>
> # pass: [72b0b80714066a435502b67cdb55a7868ba0487d] Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
> # fail: [fd1952d814da738ed107e05583b3e02ac11e88ff] Update version for v9.1.0 release
> git bisect start '--term-bad=pass' '--term-good=fail' 'HEAD' 'v9.1.0'
> # fail: [4ae7d11b70a840eec7aa27269093b15d04ebc84e] Merge tag 'pull-tcg-20240922' of https://gitlab.com/rth7680/qemu into staging
> git bisect fail 4ae7d11b70a840eec7aa27269093b15d04ebc84e
> # fail: [b5ab62b3c0050612c7f9b0b4baeb44ebab42775a] Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
> git bisect fail b5ab62b3c0050612c7f9b0b4baeb44ebab42775a
> # pass: [1bfb726112ea4fda07c988f08df32d1eebb9abec] ui/pixman: generalize shared_image_destroy
> git bisect pass 1bfb726112ea4fda07c988f08df32d1eebb9abec
> # fail: [7e3b6d8063f245d27eecce5aabe624b5785f2a77] Merge tag 'crypto-fixes-pull-request' of https://gitlab.com/berrange/qemu into staging
> git bisect fail 7e3b6d8063f245d27eecce5aabe624b5785f2a77
> # pass: [e530581ee06573fcf48c7f7a6c3f8ec6e5809243] target/arm: Fix alignment fault priority in get_phys_addr_lpae
> git bisect pass e530581ee06573fcf48c7f7a6c3f8ec6e5809243
> # pass: [795592fef7d5d66a67b95a7f45cc1a84883db6a8] accel/tcg: Use the alignment test in tlb_fill_align
> git bisect pass 795592fef7d5d66a67b95a7f45cc1a84883db6a8
> # fail: [9d08a70ddc08e9b6ecf870fd232531c78fe0b208] tests/tcg: Run test-proc-mappings.py on i386
> git bisect fail 9d08a70ddc08e9b6ecf870fd232531c78fe0b208
> # fail: [da335fe12a5da71a33d7afc2075a341f26213f53] include/exec/memop: Move get_alignment_bits from tcg.h
> git bisect fail da335fe12a5da71a33d7afc2075a341f26213f53
> # fail: [e5b063e81fd2b30aad1e9128238871c71b62a666] include/exec/memop: Introduce memop_atomicity_bits
> git bisect fail e5b063e81fd2b30aad1e9128238871c71b62a666
> # pass: [f168808d7d100ed96c52c4438c4ddb557bd086bf] accel/tcg: Add TCGCPUOps.tlb_fill_align
> git bisect pass f168808d7d100ed96c52c4438c4ddb557bd086bf
> # first pass commit: [f168808d7d100ed96c52c4438c4ddb557bd086bf] accel/tcg: Add TCGCPUOps.tlb_fill_align
>
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v3 07/12] target/ppc: Merge helper_{dcbz,dcbzep}
2024-07-19 1:06 [PATCH v3 00/12] Fixes for user-only munmap races Richard Henderson
` (5 preceding siblings ...)
2024-07-19 1:07 ` [PATCH v3 06/12] target/ppc: Split out helper_dbczl for 970 Richard Henderson
@ 2024-07-19 1:07 ` Richard Henderson
2024-07-19 1:07 ` [PATCH v3 08/12] target/ppc: Improve helper_dcbz for user-only Richard Henderson
` (4 subsequent siblings)
11 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2024-07-19 1:07 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv, Nicholas Piggin
Merge the two and pass the mmu_idx directly from translation.
Swap the argument order in dcbz_common to avoid extra swaps.
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/ppc/helper.h | 3 +--
target/ppc/mem_helper.c | 14 ++++----------
target/ppc/translate.c | 4 ++--
3 files changed, 7 insertions(+), 14 deletions(-)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index afc56855ff..4fa089cbf9 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -46,8 +46,7 @@ DEF_HELPER_FLAGS_3(stmw, TCG_CALL_NO_WG, void, env, tl, i32)
DEF_HELPER_4(lsw, void, env, tl, i32, i32)
DEF_HELPER_5(lswx, void, env, tl, i32, i32, i32)
DEF_HELPER_FLAGS_4(stsw, TCG_CALL_NO_WG, void, env, tl, i32, i32)
-DEF_HELPER_FLAGS_2(dcbz, TCG_CALL_NO_WG, void, env, tl)
-DEF_HELPER_FLAGS_2(dcbzep, TCG_CALL_NO_WG, void, env, tl)
+DEF_HELPER_FLAGS_3(dcbz, TCG_CALL_NO_WG, void, env, tl, int)
#ifdef TARGET_PPC64
DEF_HELPER_FLAGS_2(dcbzl, TCG_CALL_NO_WG, void, env, tl)
#endif
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index d4957efd6e..24bae3b80c 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -271,7 +271,7 @@ void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
}
static void dcbz_common(CPUPPCState *env, target_ulong addr,
- int dcbz_size, int mmu_idx, uintptr_t retaddr)
+ int mmu_idx, int dcbz_size, uintptr_t retaddr)
{
target_ulong mask = ~(target_ulong)(dcbz_size - 1);
void *haddr;
@@ -296,15 +296,9 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
}
}
-void helper_dcbz(CPUPPCState *env, target_ulong addr)
+void helper_dcbz(CPUPPCState *env, target_ulong addr, int mmu_idx)
{
- dcbz_common(env, addr, env->dcache_line_size,
- ppc_env_mmu_index(env, false), GETPC());
-}
-
-void helper_dcbzep(CPUPPCState *env, target_ulong addr)
-{
- dcbz_common(env, addr, env->dcache_line_size, PPC_TLB_EPID_STORE, GETPC());
+ dcbz_common(env, addr, mmu_idx, env->dcache_line_size, GETPC());
}
#ifdef TARGET_PPC64
@@ -320,7 +314,7 @@ void helper_dcbzl(CPUPPCState *env, target_ulong addr)
dcbz_size = 32;
}
- dcbz_common(env, addr, dcbz_size, ppc_env_mmu_index(env, false), GETPC());
+ dcbz_common(env, addr, ppc_env_mmu_index(env, false), dcbz_size, GETPC());
}
#endif
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 9e472ab7ef..cba943a49d 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -4458,7 +4458,7 @@ static void gen_dcbz(DisasContext *ctx)
}
#endif
- gen_helper_dcbz(tcg_env, tcgv_addr);
+ gen_helper_dcbz(tcg_env, tcgv_addr, tcg_constant_i32(ctx->mem_idx));
}
/* dcbzep */
@@ -4468,7 +4468,7 @@ static void gen_dcbzep(DisasContext *ctx)
gen_set_access_type(ctx, ACCESS_CACHE);
gen_addr_reg_index(ctx, tcgv_addr);
- gen_helper_dcbzep(tcg_env, tcgv_addr);
+ gen_helper_dcbz(tcg_env, tcgv_addr, tcg_constant_i32(PPC_TLB_EPID_STORE));
}
/* dst / dstt */
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v3 08/12] target/ppc: Improve helper_dcbz for user-only
2024-07-19 1:06 [PATCH v3 00/12] Fixes for user-only munmap races Richard Henderson
` (6 preceding siblings ...)
2024-07-19 1:07 ` [PATCH v3 07/12] target/ppc: Merge helper_{dcbz,dcbzep} Richard Henderson
@ 2024-07-19 1:07 ` Richard Henderson
2024-07-22 12:08 ` Peter Maydell
2024-07-19 1:07 ` [PATCH v3 09/12] target/s390x: Use user_or_likely in do_access_memset Richard Henderson
` (3 subsequent siblings)
11 siblings, 1 reply; 20+ messages in thread
From: Richard Henderson @ 2024-07-19 1:07 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv
Mark the reserve_addr check unlikely. Use tlb_vaddr_to_host
instead of probe_write, relying on the memset itself to test
for page writability. Use set/clear_helper_retaddr so that
we can properly unwind on segfault.
With this, a trivial loop around guest memset will spend
nearly 50% of runtime within helper_dcbz and host memset.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/ppc/mem_helper.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 24bae3b80c..953dd08d5d 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -280,20 +280,27 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
addr &= mask;
/* Check reservation */
- if ((env->reserve_addr & mask) == addr) {
+ if (unlikely((env->reserve_addr & mask) == addr)) {
env->reserve_addr = (target_ulong)-1ULL;
}
/* Try fast path translate */
+#ifdef CONFIG_USER_ONLY
+ haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, mmu_idx);
+#else
haddr = probe_write(env, addr, dcbz_size, mmu_idx, retaddr);
- if (haddr) {
- memset(haddr, 0, dcbz_size);
- } else {
+ if (unlikely(!haddr)) {
/* Slow path */
for (int i = 0; i < dcbz_size; i += 8) {
cpu_stq_mmuidx_ra(env, addr + i, 0, mmu_idx, retaddr);
}
+ return;
}
+#endif
+
+ set_helper_retaddr(retaddr);
+ memset(haddr, 0, dcbz_size);
+ clear_helper_retaddr();
}
void helper_dcbz(CPUPPCState *env, target_ulong addr, int mmu_idx)
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH v3 08/12] target/ppc: Improve helper_dcbz for user-only
2024-07-19 1:07 ` [PATCH v3 08/12] target/ppc: Improve helper_dcbz for user-only Richard Henderson
@ 2024-07-22 12:08 ` Peter Maydell
0 siblings, 0 replies; 20+ messages in thread
From: Peter Maydell @ 2024-07-22 12:08 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel, qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv
On Fri, 19 Jul 2024 at 02:08, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Mark the reserve_addr check unlikely. Use tlb_vaddr_to_host
> instead of probe_write, relying on the memset itself to test
> for page writability. Use set/clear_helper_retaddr so that
> we can properly unwind on segfault.
>
> With this, a trivial loop around guest memset will spend
> nearly 50% of runtime within helper_dcbz and host memset.
I find this a bit difficult to interpret -- maybe add
what it was before (presumably spending too much time
somewhere else) ?
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
thanks
-- PMM
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v3 09/12] target/s390x: Use user_or_likely in do_access_memset
2024-07-19 1:06 [PATCH v3 00/12] Fixes for user-only munmap races Richard Henderson
` (7 preceding siblings ...)
2024-07-19 1:07 ` [PATCH v3 08/12] target/ppc: Improve helper_dcbz for user-only Richard Henderson
@ 2024-07-19 1:07 ` Richard Henderson
2024-07-19 1:07 ` [PATCH v3 10/12] target/s390x: Use user_or_likely in access_memmove Richard Henderson
` (2 subsequent siblings)
11 siblings, 0 replies; 20+ messages in thread
From: Richard Henderson @ 2024-07-19 1:07 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv, Peter Maydell
Eliminate the ifdef by using a predicate that is
always true with CONFIG_USER_ONLY.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/tcg/mem_helper.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index 6cdbc34178..5311a15a09 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -225,10 +225,7 @@ static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
uint8_t byte, uint16_t size, int mmu_idx,
uintptr_t ra)
{
-#ifdef CONFIG_USER_ONLY
- memset(haddr, byte, size);
-#else
- if (likely(haddr)) {
+ if (user_or_likely(haddr)) {
memset(haddr, byte, size);
} else {
MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
@@ -236,7 +233,6 @@ static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
}
}
-#endif
}
static void access_memset(CPUS390XState *env, S390Access *desta,
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v3 10/12] target/s390x: Use user_or_likely in access_memmove
2024-07-19 1:06 [PATCH v3 00/12] Fixes for user-only munmap races Richard Henderson
` (8 preceding siblings ...)
2024-07-19 1:07 ` [PATCH v3 09/12] target/s390x: Use user_or_likely in do_access_memset Richard Henderson
@ 2024-07-19 1:07 ` Richard Henderson
2024-07-22 12:10 ` Peter Maydell
2024-07-19 1:07 ` [PATCH v3 11/12] target/s390x: Use set/clear_helper_retaddr in mem_helper.c Richard Henderson
2024-07-19 1:07 ` [PATCH v3 12/12] target/riscv: Simplify probing in vext_ldff Richard Henderson
11 siblings, 1 reply; 20+ messages in thread
From: Richard Henderson @ 2024-07-19 1:07 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv
Invert the conditional, indent the block, and use the macro
that expands to true for user-only.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/tcg/mem_helper.c | 54 +++++++++++++++++------------------
1 file changed, 26 insertions(+), 28 deletions(-)
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index 5311a15a09..331a35b2e5 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -296,41 +296,39 @@ static void access_memmove(CPUS390XState *env, S390Access *desta,
S390Access *srca, uintptr_t ra)
{
int len = desta->size1 + desta->size2;
- int diff;
assert(len == srca->size1 + srca->size2);
/* Fallback to slow access in case we don't have access to all host pages */
- if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
- !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
- int i;
+ if (user_or_likely(desta->haddr1 &&
+ srca->haddr1 &&
+ (!desta->size2 || desta->haddr2) &&
+ (!srca->size2 || srca->haddr2))) {
+ int diff = desta->size1 - srca->size1;
- for (i = 0; i < len; i++) {
- uint8_t byte = access_get_byte(env, srca, i, ra);
-
- access_set_byte(env, desta, i, byte, ra);
- }
- return;
- }
-
- diff = desta->size1 - srca->size1;
- if (likely(diff == 0)) {
- memmove(desta->haddr1, srca->haddr1, srca->size1);
- if (unlikely(srca->size2)) {
- memmove(desta->haddr2, srca->haddr2, srca->size2);
- }
- } else if (diff > 0) {
- memmove(desta->haddr1, srca->haddr1, srca->size1);
- memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
- if (likely(desta->size2)) {
- memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
+ if (likely(diff == 0)) {
+ memmove(desta->haddr1, srca->haddr1, srca->size1);
+ if (unlikely(srca->size2)) {
+ memmove(desta->haddr2, srca->haddr2, srca->size2);
+ }
+ } else if (diff > 0) {
+ memmove(desta->haddr1, srca->haddr1, srca->size1);
+ memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
+ if (likely(desta->size2)) {
+ memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
+ }
+ } else {
+ diff = -diff;
+ memmove(desta->haddr1, srca->haddr1, desta->size1);
+ memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
+ if (likely(srca->size2)) {
+ memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
+ }
}
} else {
- diff = -diff;
- memmove(desta->haddr1, srca->haddr1, desta->size1);
- memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
- if (likely(srca->size2)) {
- memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
+ for (int i = 0; i < len; i++) {
+ uint8_t byte = access_get_byte(env, srca, i, ra);
+ access_set_byte(env, desta, i, byte, ra);
}
}
}
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH v3 10/12] target/s390x: Use user_or_likely in access_memmove
2024-07-19 1:07 ` [PATCH v3 10/12] target/s390x: Use user_or_likely in access_memmove Richard Henderson
@ 2024-07-22 12:10 ` Peter Maydell
0 siblings, 0 replies; 20+ messages in thread
From: Peter Maydell @ 2024-07-22 12:10 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel, qemu-arm, qemu-s390x, qemu-riscv
On Fri, 19 Jul 2024 at 02:08, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Invert the conditional, indent the block, and use the macro
> that expands to true for user-only.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/s390x/tcg/mem_helper.c | 54 +++++++++++++++++------------------
> 1 file changed, 26 insertions(+), 28 deletions(-)
>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
thanks
-- PMM
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v3 11/12] target/s390x: Use set/clear_helper_retaddr in mem_helper.c
2024-07-19 1:06 [PATCH v3 00/12] Fixes for user-only munmap races Richard Henderson
` (9 preceding siblings ...)
2024-07-19 1:07 ` [PATCH v3 10/12] target/s390x: Use user_or_likely in access_memmove Richard Henderson
@ 2024-07-19 1:07 ` Richard Henderson
2024-07-22 12:15 ` Peter Maydell
2024-07-19 1:07 ` [PATCH v3 12/12] target/riscv: Simplify probing in vext_ldff Richard Henderson
11 siblings, 1 reply; 20+ messages in thread
From: Richard Henderson @ 2024-07-19 1:07 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv
Avoid a race condition with munmap in another thread.
For access_memset and access_memmove, manage the value
within the helper. For uses of access_{get,set}_byte,
manage the value across the for loops.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/tcg/mem_helper.c | 43 ++++++++++++++++++++++++++++++-----
1 file changed, 37 insertions(+), 6 deletions(-)
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index 331a35b2e5..0e12dae2aa 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -238,14 +238,14 @@ static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
static void access_memset(CPUS390XState *env, S390Access *desta,
uint8_t byte, uintptr_t ra)
{
-
+ set_helper_retaddr(ra);
do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
desta->mmu_idx, ra);
- if (likely(!desta->size2)) {
- return;
+ if (unlikely(desta->size2)) {
+ do_access_memset(env, desta->vaddr2, desta->haddr2, byte,
+ desta->size2, desta->mmu_idx, ra);
}
- do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
- desta->mmu_idx, ra);
+ clear_helper_retaddr();
}
static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
@@ -366,6 +366,8 @@ static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+ set_helper_retaddr(ra);
+
for (i = 0; i < l; i++) {
const uint8_t x = access_get_byte(env, &srca1, i, ra) &
access_get_byte(env, &srca2, i, ra);
@@ -373,6 +375,8 @@ static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
c |= x;
access_set_byte(env, &desta, i, x, ra);
}
+
+ clear_helper_retaddr();
return c != 0;
}
@@ -407,6 +411,7 @@ static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
return 0;
}
+ set_helper_retaddr(ra);
for (i = 0; i < l; i++) {
const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
access_get_byte(env, &srca2, i, ra);
@@ -414,6 +419,7 @@ static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
c |= x;
access_set_byte(env, &desta, i, x, ra);
}
+ clear_helper_retaddr();
return c != 0;
}
@@ -441,6 +447,8 @@ static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+ set_helper_retaddr(ra);
+
for (i = 0; i < l; i++) {
const uint8_t x = access_get_byte(env, &srca1, i, ra) |
access_get_byte(env, &srca2, i, ra);
@@ -448,6 +456,8 @@ static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
c |= x;
access_set_byte(env, &desta, i, x, ra);
}
+
+ clear_helper_retaddr();
return c != 0;
}
@@ -484,11 +494,13 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
} else if (!is_destructive_overlap(env, dest, src, l)) {
access_memmove(env, &desta, &srca, ra);
} else {
+ set_helper_retaddr(ra);
for (i = 0; i < l; i++) {
uint8_t byte = access_get_byte(env, &srca, i, ra);
access_set_byte(env, &desta, i, byte, ra);
}
+ clear_helper_retaddr();
}
return env->cc_op;
@@ -514,10 +526,12 @@ void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+ set_helper_retaddr(ra);
for (i = l - 1; i >= 0; i--) {
uint8_t byte = access_get_byte(env, &srca, i, ra);
access_set_byte(env, &desta, i, byte, ra);
}
+ clear_helper_retaddr();
}
/* move inverse */
@@ -534,11 +548,13 @@ void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
src = wrap_address(env, src - l + 1);
access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+
+ set_helper_retaddr(ra);
for (i = 0; i < l; i++) {
const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
-
access_set_byte(env, &desta, i, x, ra);
}
+ clear_helper_retaddr();
}
/* move numerics */
@@ -555,12 +571,15 @@ void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+
+ set_helper_retaddr(ra);
for (i = 0; i < l; i++) {
const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
(access_get_byte(env, &srca2, i, ra) & 0xf0);
access_set_byte(env, &desta, i, x, ra);
}
+ clear_helper_retaddr();
}
/* move with offset */
@@ -580,6 +599,8 @@ void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
/* Handle rightmost byte */
byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
+
+ set_helper_retaddr(ra);
byte_src = access_get_byte(env, &srca, len_src - 1, ra);
byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
@@ -595,6 +616,7 @@ void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
byte_dest |= byte_src << 4;
access_set_byte(env, &desta, i, byte_dest, ra);
}
+ clear_helper_retaddr();
}
/* move zones */
@@ -611,12 +633,15 @@ void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+
+ set_helper_retaddr(ra);
for (i = 0; i < l; i++) {
const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
(access_get_byte(env, &srca2, i, ra) & 0x0f);
access_set_byte(env, &desta, i, x, ra);
}
+ clear_helper_retaddr();
}
/* compare unsigned byte arrays */
@@ -961,15 +986,19 @@ uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
*/
access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
+
+ set_helper_retaddr(ra);
for (i = 0; i < len; i++) {
const uint8_t v = access_get_byte(env, &srca, i, ra);
access_set_byte(env, &desta, i, v, ra);
if (v == c) {
+ clear_helper_retaddr();
set_address_zero(env, r1, d + i);
return 1;
}
}
+ clear_helper_retaddr();
set_address_zero(env, r1, d + len);
set_address_zero(env, r2, s + len);
return 3;
@@ -1060,6 +1089,7 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
*dest = wrap_address(env, *dest + len);
} else {
access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
+ set_helper_retaddr(ra);
/* The remaining length selects the padding byte. */
for (i = 0; i < len; (*destlen)--, i++) {
@@ -1069,6 +1099,7 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
access_set_byte(env, &desta, i, pad >> 8, ra);
}
}
+ clear_helper_retaddr();
*dest = wrap_address(env, *dest + len);
}
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH v3 11/12] target/s390x: Use set/clear_helper_retaddr in mem_helper.c
2024-07-19 1:07 ` [PATCH v3 11/12] target/s390x: Use set/clear_helper_retaddr in mem_helper.c Richard Henderson
@ 2024-07-22 12:15 ` Peter Maydell
0 siblings, 0 replies; 20+ messages in thread
From: Peter Maydell @ 2024-07-22 12:15 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel, qemu-arm, qemu-s390x, qemu-riscv
On Fri, 19 Jul 2024 at 02:09, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Avoid a race condition with munmap in another thread.
> For access_memset and access_memmove, manage the value
> within the helper. For uses of access_{get,set}_byte,
> manage the value across the for loops.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/s390x/tcg/mem_helper.c | 43 ++++++++++++++++++++++++++++++-----
> 1 file changed, 37 insertions(+), 6 deletions(-)
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
thanks
-- PMM
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v3 12/12] target/riscv: Simplify probing in vext_ldff
2024-07-19 1:06 [PATCH v3 00/12] Fixes for user-only munmap races Richard Henderson
` (10 preceding siblings ...)
2024-07-19 1:07 ` [PATCH v3 11/12] target/s390x: Use set/clear_helper_retaddr in mem_helper.c Richard Henderson
@ 2024-07-19 1:07 ` Richard Henderson
2024-07-22 16:49 ` Max Chou
11 siblings, 1 reply; 20+ messages in thread
From: Richard Henderson @ 2024-07-19 1:07 UTC (permalink / raw)
To: qemu-devel; +Cc: qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv, Alistair Francis
The current pairing of tlb_vaddr_to_host with extra is either
inefficient (user-only, with page_check_range) or incorrect
(system, with probe_pages).
For proper non-fault behaviour, use probe_access_flags with
its nonfault parameter set to true.
Acked-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/riscv/vector_helper.c | 31 +++++++++++++++++--------------
1 file changed, 17 insertions(+), 14 deletions(-)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 1b4d5a8e37..10a52ceb5b 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -474,7 +474,6 @@ vext_ldff(void *vd, void *v0, target_ulong base,
vext_ldst_elem_fn *ldst_elem,
uint32_t log2_esz, uintptr_t ra)
{
- void *host;
uint32_t i, k, vl = 0;
uint32_t nf = vext_nf(desc);
uint32_t vm = vext_vm(desc);
@@ -493,27 +492,31 @@ vext_ldff(void *vd, void *v0, target_ulong base,
}
addr = adjust_addr(env, base + i * (nf << log2_esz));
if (i == 0) {
+ /* Allow fault on first element. */
probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
} else {
- /* if it triggers an exception, no need to check watchpoint */
remain = nf << log2_esz;
while (remain > 0) {
+ void *host;
+ int flags;
+
offset = -(addr | TARGET_PAGE_MASK);
- host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index);
- if (host) {
-#ifdef CONFIG_USER_ONLY
- if (!page_check_range(addr, offset, PAGE_READ)) {
- vl = i;
- goto ProbeSuccess;
- }
-#else
- probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
-#endif
- } else {
+
+ /* Probe nonfault on subsequent elements. */
+ flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD,
+ mmu_index, true, &host, 0);
+
+ /*
+ * Stop if invalid (unmapped) or mmio (transaction may fail).
+ * Do not stop if watchpoint, as the spec says that
+ * first-fault should continue to access the same
+ * elements regardless of any watchpoint.
+ */
+ if (flags & ~TLB_WATCHPOINT) {
vl = i;
goto ProbeSuccess;
}
- if (remain <= offset) {
+ if (remain <= offset) {
break;
}
remain -= offset;
--
2.43.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH v3 12/12] target/riscv: Simplify probing in vext_ldff
2024-07-19 1:07 ` [PATCH v3 12/12] target/riscv: Simplify probing in vext_ldff Richard Henderson
@ 2024-07-22 16:49 ` Max Chou
0 siblings, 0 replies; 20+ messages in thread
From: Max Chou @ 2024-07-22 16:49 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
Cc: qemu-arm, qemu-ppc, qemu-s390x, qemu-riscv, Alistair Francis
Reviewed-by: Max Chou <max.chou@sifive.com>
On 2024/7/19 9:07 AM, Richard Henderson wrote:
> The current pairing of tlb_vaddr_to_host with extra is either
> inefficient (user-only, with page_check_range) or incorrect
> (system, with probe_pages).
>
> For proper non-fault behaviour, use probe_access_flags with
> its nonfault parameter set to true.
>
> Acked-by: Alistair Francis <alistair.francis@wdc.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/riscv/vector_helper.c | 31 +++++++++++++++++--------------
> 1 file changed, 17 insertions(+), 14 deletions(-)
>
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 1b4d5a8e37..10a52ceb5b 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -474,7 +474,6 @@ vext_ldff(void *vd, void *v0, target_ulong base,
> vext_ldst_elem_fn *ldst_elem,
> uint32_t log2_esz, uintptr_t ra)
> {
> - void *host;
> uint32_t i, k, vl = 0;
> uint32_t nf = vext_nf(desc);
> uint32_t vm = vext_vm(desc);
> @@ -493,27 +492,31 @@ vext_ldff(void *vd, void *v0, target_ulong base,
> }
> addr = adjust_addr(env, base + i * (nf << log2_esz));
> if (i == 0) {
> + /* Allow fault on first element. */
> probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
> } else {
> - /* if it triggers an exception, no need to check watchpoint */
> remain = nf << log2_esz;
> while (remain > 0) {
> + void *host;
> + int flags;
> +
> offset = -(addr | TARGET_PAGE_MASK);
> - host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index);
> - if (host) {
> -#ifdef CONFIG_USER_ONLY
> - if (!page_check_range(addr, offset, PAGE_READ)) {
> - vl = i;
> - goto ProbeSuccess;
> - }
> -#else
> - probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
> -#endif
> - } else {
> +
> + /* Probe nonfault on subsequent elements. */
> + flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD,
> + mmu_index, true, &host, 0);
> +
> + /*
> + * Stop if invalid (unmapped) or mmio (transaction may fail).
> + * Do not stop if watchpoint, as the spec says that
> + * first-fault should continue to access the same
> + * elements regardless of any watchpoint.
> + */
> + if (flags & ~TLB_WATCHPOINT) {
> vl = i;
> goto ProbeSuccess;
> }
> - if (remain <= offset) {
> + if (remain <= offset) {
> break;
> }
> remain -= offset;
^ permalink raw reply [flat|nested] 20+ messages in thread