* [PATCH 01/12] accel/tcg: Move {set, clear}_helper_retaddr to cpu_ldst.h
2024-07-23 3:34 [PATCH 00/12] tcg patch queue Richard Henderson
@ 2024-07-23 3:34 ` Richard Henderson
2024-07-23 3:34 ` [PATCH 02/12] target/arm: Use set/clear_helper_retaddr in helper-a64.c Richard Henderson
` (11 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2024-07-23 3:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Maydell
Use of these in helpers goes hand-in-hand with tlb_vaddr_to_host
and other probing functions.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/user-retaddr.h | 28 ----------------------------
include/exec/cpu_ldst.h | 34 ++++++++++++++++++++++++++++++++++
accel/tcg/cpu-exec.c | 3 ---
accel/tcg/user-exec.c | 1 -
4 files changed, 34 insertions(+), 32 deletions(-)
delete mode 100644 accel/tcg/user-retaddr.h
diff --git a/accel/tcg/user-retaddr.h b/accel/tcg/user-retaddr.h
deleted file mode 100644
index e0f57e1994..0000000000
--- a/accel/tcg/user-retaddr.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef ACCEL_TCG_USER_RETADDR_H
-#define ACCEL_TCG_USER_RETADDR_H
-
-#include "qemu/atomic.h"
-
-extern __thread uintptr_t helper_retaddr;
-
-static inline void set_helper_retaddr(uintptr_t ra)
-{
- helper_retaddr = ra;
- /*
- * Ensure that this write is visible to the SIGSEGV handler that
- * may be invoked due to a subsequent invalid memory operation.
- */
- signal_barrier();
-}
-
-static inline void clear_helper_retaddr(void)
-{
- /*
- * Ensure that previous memory operations have succeeded before
- * removing the data visible to the signal handler.
- */
- signal_barrier();
- helper_retaddr = 0;
-}
-
-#endif
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index 71009f84f5..dac12bd8eb 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -379,4 +379,38 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
MMUAccessType access_type, int mmu_idx);
#endif
+/*
+ * For user-only, helpers that use guest to host address translation
+ * must protect the actual host memory access by recording 'retaddr'
+ * for the signal handler. This is required for a race condition in
+ * which another thread unmaps the page between a probe and the
+ * actual access.
+ */
+#ifdef CONFIG_USER_ONLY
+extern __thread uintptr_t helper_retaddr;
+
+static inline void set_helper_retaddr(uintptr_t ra)
+{
+ helper_retaddr = ra;
+ /*
+ * Ensure that this write is visible to the SIGSEGV handler that
+ * may be invoked due to a subsequent invalid memory operation.
+ */
+ signal_barrier();
+}
+
+static inline void clear_helper_retaddr(void)
+{
+ /*
+ * Ensure that previous memory operations have succeeded before
+ * removing the data visible to the signal handler.
+ */
+ signal_barrier();
+ helper_retaddr = 0;
+}
+#else
+#define set_helper_retaddr(ra) do { } while (0)
+#define clear_helper_retaddr() do { } while (0)
+#endif
+
#endif /* CPU_LDST_H */
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 9010dad073..8163295f34 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -41,9 +41,6 @@
#include "tb-context.h"
#include "internal-common.h"
#include "internal-target.h"
-#if defined(CONFIG_USER_ONLY)
-#include "user-retaddr.h"
-#endif
/* -icount align implementation. */
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 80d24540ed..7ddc47b0ba 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -33,7 +33,6 @@
#include "tcg/tcg-ldst.h"
#include "internal-common.h"
#include "internal-target.h"
-#include "user-retaddr.h"
__thread uintptr_t helper_retaddr;
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 02/12] target/arm: Use set/clear_helper_retaddr in helper-a64.c
2024-07-23 3:34 [PATCH 00/12] tcg patch queue Richard Henderson
2024-07-23 3:34 ` [PATCH 01/12] accel/tcg: Move {set, clear}_helper_retaddr to cpu_ldst.h Richard Henderson
@ 2024-07-23 3:34 ` Richard Henderson
2024-07-23 3:34 ` [PATCH 03/12] target/arm: Use set/clear_helper_retaddr in SVE and SME helpers Richard Henderson
` (10 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2024-07-23 3:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Maydell
Use these in helper_dc_dva and the FEAT_MOPS routines to
avoid a race condition with munmap in another thread.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/helper-a64.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index 0ea8668ab4..c60d2a7ec9 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -928,6 +928,8 @@ uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
{
+ uintptr_t ra = GETPC();
+
/*
* Implement DC ZVA, which zeroes a fixed-length block of memory.
* Note that we do not implement the (architecturally mandated)
@@ -948,8 +950,6 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
#ifndef CONFIG_USER_ONLY
if (unlikely(!mem)) {
- uintptr_t ra = GETPC();
-
/*
* Trap if accessing an invalid page. DC_ZVA requires that we supply
* the original pointer for an invalid page. But watchpoints require
@@ -971,7 +971,9 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
}
#endif
+ set_helper_retaddr(ra);
memset(mem, 0, blocklen);
+ clear_helper_retaddr();
}
void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr,
@@ -1120,7 +1122,9 @@ static uint64_t set_step(CPUARMState *env, uint64_t toaddr,
}
#endif
/* Easy case: just memset the host memory */
+ set_helper_retaddr(ra);
memset(mem, data, setsize);
+ clear_helper_retaddr();
return setsize;
}
@@ -1163,7 +1167,9 @@ static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr,
}
#endif
/* Easy case: just memset the host memory */
+ set_helper_retaddr(ra);
memset(mem, data, setsize);
+ clear_helper_retaddr();
mte_mops_set_tags(env, toaddr, setsize, *mtedesc);
return setsize;
}
@@ -1497,7 +1503,9 @@ static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr,
}
#endif
/* Easy case: just memmove the host memory */
+ set_helper_retaddr(ra);
memmove(wmem, rmem, copysize);
+ clear_helper_retaddr();
return copysize;
}
@@ -1572,7 +1580,9 @@ static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr,
* Easy case: just memmove the host memory. Note that wmem and
* rmem here point to the *last* byte to copy.
*/
+ set_helper_retaddr(ra);
memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize);
+ clear_helper_retaddr();
return copysize;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 03/12] target/arm: Use set/clear_helper_retaddr in SVE and SME helpers
2024-07-23 3:34 [PATCH 00/12] tcg patch queue Richard Henderson
2024-07-23 3:34 ` [PATCH 01/12] accel/tcg: Move {set, clear}_helper_retaddr to cpu_ldst.h Richard Henderson
2024-07-23 3:34 ` [PATCH 02/12] target/arm: Use set/clear_helper_retaddr in helper-a64.c Richard Henderson
@ 2024-07-23 3:34 ` Richard Henderson
2024-07-23 3:34 ` [PATCH 04/12] target/ppc/mem_helper.c: Remove a conditional from dcbz_common() Richard Henderson
` (9 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2024-07-23 3:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Maydell
Avoid a race condition with munmap in another thread.
Use around blocks that exclusively use "host_fn".
Keep the blocks as small as possible, but without setting
and clearing for every operation on one page.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/sme_helper.c | 16 ++++++++++++++
target/arm/tcg/sve_helper.c | 42 +++++++++++++++++++++++++++++--------
2 files changed, 49 insertions(+), 9 deletions(-)
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index 5a6dd76489..50bb088d04 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -517,6 +517,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
clr_fn(za, 0, reg_off);
}
+ set_helper_retaddr(ra);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -529,6 +531,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -543,6 +547,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(ra);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -554,6 +560,8 @@ void sme_ld1(CPUARMState *env, void *za, uint64_t *vg,
reg_off += esize;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
@@ -701,6 +709,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(ra);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -711,6 +721,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -725,6 +737,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(ra);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -734,6 +748,8 @@ void sme_st1(CPUARMState *env, void *za, uint64_t *vg,
reg_off += 1 << esz;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index dd49e67d7a..f1ee0e060f 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -5738,6 +5738,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(retaddr);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -5752,6 +5754,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -5771,6 +5775,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(retaddr);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -5784,6 +5790,8 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
mem_off += N << msz;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
@@ -5934,15 +5942,11 @@ DO_LDN_2(4, dd, MO_64)
/*
* Load contiguous data, first-fault and no-fault.
*
- * For user-only, one could argue that we should hold the mmap_lock during
- * the operation so that there is no race between page_check_range and the
- * load operation. However, unmapping pages out from under a running thread
- * is extraordinarily unlikely. This theoretical race condition also affects
- * linux-user/ in its get_user/put_user macros.
- *
- * TODO: Construct some helpers, written in assembly, that interact with
- * host_signal_handler to produce memory ops which can properly report errors
- * without racing.
+ * For user-only, we control the race between page_check_range and
+ * another thread's munmap by using set/clear_helper_retaddr. Any
+ * SEGV that occurs between those markers is assumed to be because
+ * the guest page vanished. Keep that block as small as possible
+ * so that unrelated QEMU bugs are not blamed on the guest.
*/
/* Fault on byte I. All bits in FFR from I are cleared. The vector
@@ -6093,6 +6097,8 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(retaddr);
+
do {
uint64_t pg = *(uint64_t *)(vg + (reg_off >> 3));
do {
@@ -6101,9 +6107,11 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
(cpu_watchpoint_address_matches
(env_cpu(env), addr + mem_off, 1 << msz)
& BP_MEM_READ)) {
+ clear_helper_retaddr();
goto do_fault;
}
if (mtedesc && !mte_probe(env, mtedesc, addr + mem_off)) {
+ clear_helper_retaddr();
goto do_fault;
}
host_fn(vd, reg_off, host + mem_off);
@@ -6113,6 +6121,8 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
} while (reg_off <= reg_last && (reg_off & 63));
} while (reg_off <= reg_last);
+ clear_helper_retaddr();
+
/*
* MemSingleNF is allowed to fail for any reason. We have special
* code above to handle the first element crossing a page boundary.
@@ -6348,6 +6358,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
reg_last = info.reg_off_last[0];
host = info.page[0].host;
+ set_helper_retaddr(retaddr);
+
while (reg_off <= reg_last) {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -6362,6 +6374,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
} while (reg_off <= reg_last && (reg_off & 63));
}
+ clear_helper_retaddr();
+
/*
* Use the slow path to manage the cross-page misalignment.
* But we know this is RAM and cannot trap.
@@ -6381,6 +6395,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
reg_last = info.reg_off_last[1];
host = info.page[1].host;
+ set_helper_retaddr(retaddr);
+
do {
uint64_t pg = vg[reg_off >> 6];
do {
@@ -6394,6 +6410,8 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
mem_off += N << msz;
} while (reg_off & 63);
} while (reg_off <= reg_last);
+
+ clear_helper_retaddr();
}
}
@@ -6560,7 +6578,9 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
if (unlikely(info.flags & TLB_MMIO)) {
tlb_fn(env, &scratch, reg_off, addr, retaddr);
} else {
+ set_helper_retaddr(retaddr);
host_fn(&scratch, reg_off, info.host);
+ clear_helper_retaddr();
}
} else {
/* Element crosses the page boundary. */
@@ -6782,7 +6802,9 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
goto fault;
}
+ set_helper_retaddr(retaddr);
host_fn(vd, reg_off, info.host);
+ clear_helper_retaddr();
}
reg_off += esize;
} while (reg_off & 63);
@@ -6986,7 +7008,9 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
do {
void *h = host[i];
if (likely(h != NULL)) {
+ set_helper_retaddr(retaddr);
host_fn(vd, reg_off, h);
+ clear_helper_retaddr();
} else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) {
target_ulong addr = base + (off_fn(vm, reg_off) << scale);
tlb_fn(env, vd, reg_off, addr, retaddr);
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 04/12] target/ppc/mem_helper.c: Remove a conditional from dcbz_common()
2024-07-23 3:34 [PATCH 00/12] tcg patch queue Richard Henderson
` (2 preceding siblings ...)
2024-07-23 3:34 ` [PATCH 03/12] target/arm: Use set/clear_helper_retaddr in SVE and SME helpers Richard Henderson
@ 2024-07-23 3:34 ` Richard Henderson
2024-07-23 3:34 ` [PATCH 05/12] target/ppc: Hoist dcbz_size out of dcbz_common Richard Henderson
` (8 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2024-07-23 3:34 UTC (permalink / raw)
To: qemu-devel; +Cc: BALATON Zoltan, Nicholas Piggin
From: BALATON Zoltan <balaton@eik.bme.hu>
Instead of passing a bool and select a value within dcbz_common() let
the callers pass in the right value to avoid this conditional
statement. On PPC dcbz is often used to zero memory and some code uses
it a lot. This change improves the run time of a test case that copies
memory with a dcbz call in every iteration from 6.23 to 5.83 seconds.
Signed-off-by: BALATON Zoltan <balaton@eik.bme.hu>
Message-Id: <20240622204833.5F7C74E6000@zero.eik.bme.hu>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
---
target/ppc/mem_helper.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index f88155ad45..361fd72226 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -271,12 +271,11 @@ void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
}
static void dcbz_common(CPUPPCState *env, target_ulong addr,
- uint32_t opcode, bool epid, uintptr_t retaddr)
+ uint32_t opcode, int mmu_idx, uintptr_t retaddr)
{
target_ulong mask, dcbz_size = env->dcache_line_size;
uint32_t i;
void *haddr;
- int mmu_idx = epid ? PPC_TLB_EPID_STORE : ppc_env_mmu_index(env, false);
#if defined(TARGET_PPC64)
/* Check for dcbz vs dcbzl on 970 */
@@ -309,12 +308,12 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode)
{
- dcbz_common(env, addr, opcode, false, GETPC());
+ dcbz_common(env, addr, opcode, ppc_env_mmu_index(env, false), GETPC());
}
void helper_dcbzep(CPUPPCState *env, target_ulong addr, uint32_t opcode)
{
- dcbz_common(env, addr, opcode, true, GETPC());
+ dcbz_common(env, addr, opcode, PPC_TLB_EPID_STORE, GETPC());
}
void helper_icbi(CPUPPCState *env, target_ulong addr)
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 05/12] target/ppc: Hoist dcbz_size out of dcbz_common
2024-07-23 3:34 [PATCH 00/12] tcg patch queue Richard Henderson
` (3 preceding siblings ...)
2024-07-23 3:34 ` [PATCH 04/12] target/ppc/mem_helper.c: Remove a conditional from dcbz_common() Richard Henderson
@ 2024-07-23 3:34 ` Richard Henderson
2024-07-23 3:34 ` [PATCH 06/12] target/ppc: Split out helper_dbczl for 970 Richard Henderson
` (7 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2024-07-23 3:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Nicholas Piggin, BALATON Zoltan
The 970 logic does not apply to dcbzep, which is an e500 insn.
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: BALATON Zoltan <balaton@eik.bme.hu>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/ppc/mem_helper.c | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 361fd72226..5067919ff8 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -271,22 +271,12 @@ void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
}
static void dcbz_common(CPUPPCState *env, target_ulong addr,
- uint32_t opcode, int mmu_idx, uintptr_t retaddr)
+ int dcbz_size, int mmu_idx, uintptr_t retaddr)
{
- target_ulong mask, dcbz_size = env->dcache_line_size;
- uint32_t i;
+ target_ulong mask = ~(target_ulong)(dcbz_size - 1);
void *haddr;
-#if defined(TARGET_PPC64)
- /* Check for dcbz vs dcbzl on 970 */
- if (env->excp_model == POWERPC_EXCP_970 &&
- !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
- dcbz_size = 32;
- }
-#endif
-
/* Align address */
- mask = ~(dcbz_size - 1);
addr &= mask;
/* Check reservation */
@@ -300,7 +290,7 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
memset(haddr, 0, dcbz_size);
} else {
/* Slow path */
- for (i = 0; i < dcbz_size; i += 8) {
+ for (int i = 0; i < dcbz_size; i += 8) {
cpu_stq_mmuidx_ra(env, addr + i, 0, mmu_idx, retaddr);
}
}
@@ -308,12 +298,22 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode)
{
- dcbz_common(env, addr, opcode, ppc_env_mmu_index(env, false), GETPC());
+ int dcbz_size = env->dcache_line_size;
+
+#if defined(TARGET_PPC64)
+ /* Check for dcbz vs dcbzl on 970 */
+ if (env->excp_model == POWERPC_EXCP_970 &&
+ !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
+ dcbz_size = 32;
+ }
+#endif
+
+ dcbz_common(env, addr, dcbz_size, ppc_env_mmu_index(env, false), GETPC());
}
void helper_dcbzep(CPUPPCState *env, target_ulong addr, uint32_t opcode)
{
- dcbz_common(env, addr, opcode, PPC_TLB_EPID_STORE, GETPC());
+ dcbz_common(env, addr, env->dcache_line_size, PPC_TLB_EPID_STORE, GETPC());
}
void helper_icbi(CPUPPCState *env, target_ulong addr)
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 06/12] target/ppc: Split out helper_dbczl for 970
2024-07-23 3:34 [PATCH 00/12] tcg patch queue Richard Henderson
` (4 preceding siblings ...)
2024-07-23 3:34 ` [PATCH 05/12] target/ppc: Hoist dcbz_size out of dcbz_common Richard Henderson
@ 2024-07-23 3:34 ` Richard Henderson
2024-07-23 3:34 ` [PATCH 07/12] target/ppc: Merge helper_{dcbz,dcbzep} Richard Henderson
` (6 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2024-07-23 3:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Nicholas Piggin, BALATON Zoltan
We can determine at translation time whether the insn is or
is not dbczl. We must retain a runtime check against the
HID5 register, but we can move that to a separate function
that never affects other ppc models.
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: BALATON Zoltan <balaton@eik.bme.hu>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/ppc/helper.h | 7 +++++--
target/ppc/mem_helper.c | 34 +++++++++++++++++++++-------------
target/ppc/translate.c | 24 ++++++++++++++----------
3 files changed, 40 insertions(+), 25 deletions(-)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 76b8f25c77..afc56855ff 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -46,8 +46,11 @@ DEF_HELPER_FLAGS_3(stmw, TCG_CALL_NO_WG, void, env, tl, i32)
DEF_HELPER_4(lsw, void, env, tl, i32, i32)
DEF_HELPER_5(lswx, void, env, tl, i32, i32, i32)
DEF_HELPER_FLAGS_4(stsw, TCG_CALL_NO_WG, void, env, tl, i32, i32)
-DEF_HELPER_FLAGS_3(dcbz, TCG_CALL_NO_WG, void, env, tl, i32)
-DEF_HELPER_FLAGS_3(dcbzep, TCG_CALL_NO_WG, void, env, tl, i32)
+DEF_HELPER_FLAGS_2(dcbz, TCG_CALL_NO_WG, void, env, tl)
+DEF_HELPER_FLAGS_2(dcbzep, TCG_CALL_NO_WG, void, env, tl)
+#ifdef TARGET_PPC64
+DEF_HELPER_FLAGS_2(dcbzl, TCG_CALL_NO_WG, void, env, tl)
+#endif
DEF_HELPER_FLAGS_2(icbi, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_FLAGS_2(icbiep, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 5067919ff8..d4957efd6e 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -296,26 +296,34 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
}
}
-void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode)
+void helper_dcbz(CPUPPCState *env, target_ulong addr)
{
- int dcbz_size = env->dcache_line_size;
-
-#if defined(TARGET_PPC64)
- /* Check for dcbz vs dcbzl on 970 */
- if (env->excp_model == POWERPC_EXCP_970 &&
- !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
- dcbz_size = 32;
- }
-#endif
-
- dcbz_common(env, addr, dcbz_size, ppc_env_mmu_index(env, false), GETPC());
+ dcbz_common(env, addr, env->dcache_line_size,
+ ppc_env_mmu_index(env, false), GETPC());
}
-void helper_dcbzep(CPUPPCState *env, target_ulong addr, uint32_t opcode)
+void helper_dcbzep(CPUPPCState *env, target_ulong addr)
{
dcbz_common(env, addr, env->dcache_line_size, PPC_TLB_EPID_STORE, GETPC());
}
+#ifdef TARGET_PPC64
+void helper_dcbzl(CPUPPCState *env, target_ulong addr)
+{
+ int dcbz_size = env->dcache_line_size;
+
+ /*
+ * The translator checked for POWERPC_EXCP_970.
+ * All that's left is to check HID5.
+ */
+ if (((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
+ dcbz_size = 32;
+ }
+
+ dcbz_common(env, addr, dcbz_size, ppc_env_mmu_index(env, false), GETPC());
+}
+#endif
+
void helper_icbi(CPUPPCState *env, target_ulong addr)
{
addr &= ~(env->dcache_line_size - 1);
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 0bc16d7251..9e472ab7ef 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -178,6 +178,7 @@ struct DisasContext {
/* Translation flags */
MemOp default_tcg_memop_mask;
#if defined(TARGET_PPC64)
+ powerpc_excp_t excp_model;
bool sf_mode;
bool has_cfar;
bool has_bhrb;
@@ -4445,27 +4446,29 @@ static void gen_dcblc(DisasContext *ctx)
/* dcbz */
static void gen_dcbz(DisasContext *ctx)
{
- TCGv tcgv_addr;
- TCGv_i32 tcgv_op;
+ TCGv tcgv_addr = tcg_temp_new();
gen_set_access_type(ctx, ACCESS_CACHE);
- tcgv_addr = tcg_temp_new();
- tcgv_op = tcg_constant_i32(ctx->opcode & 0x03FF000);
gen_addr_reg_index(ctx, tcgv_addr);
- gen_helper_dcbz(tcg_env, tcgv_addr, tcgv_op);
+
+#ifdef TARGET_PPC64
+ if (ctx->excp_model == POWERPC_EXCP_970 && !(ctx->opcode & 0x00200000)) {
+ gen_helper_dcbzl(tcg_env, tcgv_addr);
+ return;
+ }
+#endif
+
+ gen_helper_dcbz(tcg_env, tcgv_addr);
}
/* dcbzep */
static void gen_dcbzep(DisasContext *ctx)
{
- TCGv tcgv_addr;
- TCGv_i32 tcgv_op;
+ TCGv tcgv_addr = tcg_temp_new();
gen_set_access_type(ctx, ACCESS_CACHE);
- tcgv_addr = tcg_temp_new();
- tcgv_op = tcg_constant_i32(ctx->opcode & 0x03FF000);
gen_addr_reg_index(ctx, tcgv_addr);
- gen_helper_dcbzep(tcg_env, tcgv_addr, tcgv_op);
+ gen_helper_dcbzep(tcg_env, tcgv_addr);
}
/* dst / dstt */
@@ -6486,6 +6489,7 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
ctx->default_tcg_memop_mask = ctx->le_mode ? MO_LE : MO_BE;
ctx->flags = env->flags;
#if defined(TARGET_PPC64)
+ ctx->excp_model = env->excp_model;
ctx->sf_mode = (hflags >> HFLAGS_64) & 1;
ctx->has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
ctx->has_bhrb = !!(env->flags & POWERPC_FLAG_BHRB);
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 07/12] target/ppc: Merge helper_{dcbz,dcbzep}
2024-07-23 3:34 [PATCH 00/12] tcg patch queue Richard Henderson
` (5 preceding siblings ...)
2024-07-23 3:34 ` [PATCH 06/12] target/ppc: Split out helper_dbczl for 970 Richard Henderson
@ 2024-07-23 3:34 ` Richard Henderson
2024-07-23 3:34 ` [PATCH 08/12] target/ppc: Improve helper_dcbz for user-only Richard Henderson
` (5 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2024-07-23 3:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Nicholas Piggin
Merge the two and pass the mmu_idx directly from translation.
Swap the argument order in dcbz_common to avoid extra swaps.
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/ppc/helper.h | 3 +--
target/ppc/mem_helper.c | 14 ++++----------
target/ppc/translate.c | 4 ++--
3 files changed, 7 insertions(+), 14 deletions(-)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index afc56855ff..4fa089cbf9 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -46,8 +46,7 @@ DEF_HELPER_FLAGS_3(stmw, TCG_CALL_NO_WG, void, env, tl, i32)
DEF_HELPER_4(lsw, void, env, tl, i32, i32)
DEF_HELPER_5(lswx, void, env, tl, i32, i32, i32)
DEF_HELPER_FLAGS_4(stsw, TCG_CALL_NO_WG, void, env, tl, i32, i32)
-DEF_HELPER_FLAGS_2(dcbz, TCG_CALL_NO_WG, void, env, tl)
-DEF_HELPER_FLAGS_2(dcbzep, TCG_CALL_NO_WG, void, env, tl)
+DEF_HELPER_FLAGS_3(dcbz, TCG_CALL_NO_WG, void, env, tl, int)
#ifdef TARGET_PPC64
DEF_HELPER_FLAGS_2(dcbzl, TCG_CALL_NO_WG, void, env, tl)
#endif
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index d4957efd6e..24bae3b80c 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -271,7 +271,7 @@ void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
}
static void dcbz_common(CPUPPCState *env, target_ulong addr,
- int dcbz_size, int mmu_idx, uintptr_t retaddr)
+ int mmu_idx, int dcbz_size, uintptr_t retaddr)
{
target_ulong mask = ~(target_ulong)(dcbz_size - 1);
void *haddr;
@@ -296,15 +296,9 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
}
}
-void helper_dcbz(CPUPPCState *env, target_ulong addr)
+void helper_dcbz(CPUPPCState *env, target_ulong addr, int mmu_idx)
{
- dcbz_common(env, addr, env->dcache_line_size,
- ppc_env_mmu_index(env, false), GETPC());
-}
-
-void helper_dcbzep(CPUPPCState *env, target_ulong addr)
-{
- dcbz_common(env, addr, env->dcache_line_size, PPC_TLB_EPID_STORE, GETPC());
+ dcbz_common(env, addr, mmu_idx, env->dcache_line_size, GETPC());
}
#ifdef TARGET_PPC64
@@ -320,7 +314,7 @@ void helper_dcbzl(CPUPPCState *env, target_ulong addr)
dcbz_size = 32;
}
- dcbz_common(env, addr, dcbz_size, ppc_env_mmu_index(env, false), GETPC());
+ dcbz_common(env, addr, ppc_env_mmu_index(env, false), dcbz_size, GETPC());
}
#endif
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 9e472ab7ef..cba943a49d 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -4458,7 +4458,7 @@ static void gen_dcbz(DisasContext *ctx)
}
#endif
- gen_helper_dcbz(tcg_env, tcgv_addr);
+ gen_helper_dcbz(tcg_env, tcgv_addr, tcg_constant_i32(ctx->mem_idx));
}
/* dcbzep */
@@ -4468,7 +4468,7 @@ static void gen_dcbzep(DisasContext *ctx)
gen_set_access_type(ctx, ACCESS_CACHE);
gen_addr_reg_index(ctx, tcgv_addr);
- gen_helper_dcbzep(tcg_env, tcgv_addr);
+ gen_helper_dcbz(tcg_env, tcgv_addr, tcg_constant_i32(PPC_TLB_EPID_STORE));
}
/* dst / dstt */
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 08/12] target/ppc: Improve helper_dcbz for user-only
2024-07-23 3:34 [PATCH 00/12] tcg patch queue Richard Henderson
` (6 preceding siblings ...)
2024-07-23 3:34 ` [PATCH 07/12] target/ppc: Merge helper_{dcbz,dcbzep} Richard Henderson
@ 2024-07-23 3:34 ` Richard Henderson
2024-07-23 3:34 ` [PATCH 09/12] target/s390x: Use user_or_likely in do_access_memset Richard Henderson
` (4 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2024-07-23 3:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Maydell
Mark the reserve_addr check unlikely. Use tlb_vaddr_to_host
instead of probe_write, relying on the memset itself to test
for page writability. Use set/clear_helper_retaddr so that
we can properly unwind on segfault.
With this, a trivial loop around guest memset will no longer
spend nearly 25% of runtime within page_get_flags.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/ppc/mem_helper.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 24bae3b80c..953dd08d5d 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -280,20 +280,27 @@ static void dcbz_common(CPUPPCState *env, target_ulong addr,
addr &= mask;
/* Check reservation */
- if ((env->reserve_addr & mask) == addr) {
+ if (unlikely((env->reserve_addr & mask) == addr)) {
env->reserve_addr = (target_ulong)-1ULL;
}
/* Try fast path translate */
+#ifdef CONFIG_USER_ONLY
+ haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, mmu_idx);
+#else
haddr = probe_write(env, addr, dcbz_size, mmu_idx, retaddr);
- if (haddr) {
- memset(haddr, 0, dcbz_size);
- } else {
+ if (unlikely(!haddr)) {
/* Slow path */
for (int i = 0; i < dcbz_size; i += 8) {
cpu_stq_mmuidx_ra(env, addr + i, 0, mmu_idx, retaddr);
}
+ return;
}
+#endif
+
+ set_helper_retaddr(retaddr);
+ memset(haddr, 0, dcbz_size);
+ clear_helper_retaddr();
}
void helper_dcbz(CPUPPCState *env, target_ulong addr, int mmu_idx)
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 09/12] target/s390x: Use user_or_likely in do_access_memset
2024-07-23 3:34 [PATCH 00/12] tcg patch queue Richard Henderson
` (7 preceding siblings ...)
2024-07-23 3:34 ` [PATCH 08/12] target/ppc: Improve helper_dcbz for user-only Richard Henderson
@ 2024-07-23 3:34 ` Richard Henderson
2024-07-23 3:34 ` [PATCH 10/12] target/s390x: Use user_or_likely in access_memmove Richard Henderson
` (3 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2024-07-23 3:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Maydell
Eliminate the ifdef by using a predicate that is
always true with CONFIG_USER_ONLY.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/tcg/mem_helper.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index 6cdbc34178..5311a15a09 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -225,10 +225,7 @@ static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
uint8_t byte, uint16_t size, int mmu_idx,
uintptr_t ra)
{
-#ifdef CONFIG_USER_ONLY
- memset(haddr, byte, size);
-#else
- if (likely(haddr)) {
+ if (user_or_likely(haddr)) {
memset(haddr, byte, size);
} else {
MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
@@ -236,7 +233,6 @@ static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
}
}
-#endif
}
static void access_memset(CPUS390XState *env, S390Access *desta,
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 10/12] target/s390x: Use user_or_likely in access_memmove
2024-07-23 3:34 [PATCH 00/12] tcg patch queue Richard Henderson
` (8 preceding siblings ...)
2024-07-23 3:34 ` [PATCH 09/12] target/s390x: Use user_or_likely in do_access_memset Richard Henderson
@ 2024-07-23 3:34 ` Richard Henderson
2024-07-23 3:34 ` [PATCH 11/12] target/s390x: Use set/clear_helper_retaddr in mem_helper.c Richard Henderson
` (2 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2024-07-23 3:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Maydell
Invert the conditional, indent the block, and use the macro
that expands to true for user-only.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/tcg/mem_helper.c | 54 +++++++++++++++++------------------
1 file changed, 26 insertions(+), 28 deletions(-)
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index 5311a15a09..331a35b2e5 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -296,41 +296,39 @@ static void access_memmove(CPUS390XState *env, S390Access *desta,
S390Access *srca, uintptr_t ra)
{
int len = desta->size1 + desta->size2;
- int diff;
assert(len == srca->size1 + srca->size2);
/* Fallback to slow access in case we don't have access to all host pages */
- if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
- !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
- int i;
+ if (user_or_likely(desta->haddr1 &&
+ srca->haddr1 &&
+ (!desta->size2 || desta->haddr2) &&
+ (!srca->size2 || srca->haddr2))) {
+ int diff = desta->size1 - srca->size1;
- for (i = 0; i < len; i++) {
- uint8_t byte = access_get_byte(env, srca, i, ra);
-
- access_set_byte(env, desta, i, byte, ra);
- }
- return;
- }
-
- diff = desta->size1 - srca->size1;
- if (likely(diff == 0)) {
- memmove(desta->haddr1, srca->haddr1, srca->size1);
- if (unlikely(srca->size2)) {
- memmove(desta->haddr2, srca->haddr2, srca->size2);
- }
- } else if (diff > 0) {
- memmove(desta->haddr1, srca->haddr1, srca->size1);
- memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
- if (likely(desta->size2)) {
- memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
+ if (likely(diff == 0)) {
+ memmove(desta->haddr1, srca->haddr1, srca->size1);
+ if (unlikely(srca->size2)) {
+ memmove(desta->haddr2, srca->haddr2, srca->size2);
+ }
+ } else if (diff > 0) {
+ memmove(desta->haddr1, srca->haddr1, srca->size1);
+ memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
+ if (likely(desta->size2)) {
+ memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
+ }
+ } else {
+ diff = -diff;
+ memmove(desta->haddr1, srca->haddr1, desta->size1);
+ memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
+ if (likely(srca->size2)) {
+ memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
+ }
}
} else {
- diff = -diff;
- memmove(desta->haddr1, srca->haddr1, desta->size1);
- memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
- if (likely(srca->size2)) {
- memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
+ for (int i = 0; i < len; i++) {
+ uint8_t byte = access_get_byte(env, srca, i, ra);
+ access_set_byte(env, desta, i, byte, ra);
}
}
}
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 11/12] target/s390x: Use set/clear_helper_retaddr in mem_helper.c
2024-07-23 3:34 [PATCH 00/12] tcg patch queue Richard Henderson
` (9 preceding siblings ...)
2024-07-23 3:34 ` [PATCH 10/12] target/s390x: Use user_or_likely in access_memmove Richard Henderson
@ 2024-07-23 3:34 ` Richard Henderson
2024-07-23 3:34 ` [PATCH 12/12] target/riscv: Simplify probing in vext_ldff Richard Henderson
2024-07-23 14:59 ` [PULL 00/12] tcg patch queue Richard Henderson
12 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2024-07-23 3:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Peter Maydell
Avoid a race condition with munmap in another thread.
For access_memset and access_memmove, manage the value
within the helper. For uses of access_{get,set}_byte,
manage the value across the for loops.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/tcg/mem_helper.c | 43 ++++++++++++++++++++++++++++++-----
1 file changed, 37 insertions(+), 6 deletions(-)
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index 331a35b2e5..0e12dae2aa 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -238,14 +238,14 @@ static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
static void access_memset(CPUS390XState *env, S390Access *desta,
uint8_t byte, uintptr_t ra)
{
-
+ set_helper_retaddr(ra);
do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
desta->mmu_idx, ra);
- if (likely(!desta->size2)) {
- return;
+ if (unlikely(desta->size2)) {
+ do_access_memset(env, desta->vaddr2, desta->haddr2, byte,
+ desta->size2, desta->mmu_idx, ra);
}
- do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
- desta->mmu_idx, ra);
+ clear_helper_retaddr();
}
static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
@@ -366,6 +366,8 @@ static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+ set_helper_retaddr(ra);
+
for (i = 0; i < l; i++) {
const uint8_t x = access_get_byte(env, &srca1, i, ra) &
access_get_byte(env, &srca2, i, ra);
@@ -373,6 +375,8 @@ static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
c |= x;
access_set_byte(env, &desta, i, x, ra);
}
+
+ clear_helper_retaddr();
return c != 0;
}
@@ -407,6 +411,7 @@ static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
return 0;
}
+ set_helper_retaddr(ra);
for (i = 0; i < l; i++) {
const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
access_get_byte(env, &srca2, i, ra);
@@ -414,6 +419,7 @@ static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
c |= x;
access_set_byte(env, &desta, i, x, ra);
}
+ clear_helper_retaddr();
return c != 0;
}
@@ -441,6 +447,8 @@ static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+ set_helper_retaddr(ra);
+
for (i = 0; i < l; i++) {
const uint8_t x = access_get_byte(env, &srca1, i, ra) |
access_get_byte(env, &srca2, i, ra);
@@ -448,6 +456,8 @@ static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
c |= x;
access_set_byte(env, &desta, i, x, ra);
}
+
+ clear_helper_retaddr();
return c != 0;
}
@@ -484,11 +494,13 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
} else if (!is_destructive_overlap(env, dest, src, l)) {
access_memmove(env, &desta, &srca, ra);
} else {
+ set_helper_retaddr(ra);
for (i = 0; i < l; i++) {
uint8_t byte = access_get_byte(env, &srca, i, ra);
access_set_byte(env, &desta, i, byte, ra);
}
+ clear_helper_retaddr();
}
return env->cc_op;
@@ -514,10 +526,12 @@ void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+ set_helper_retaddr(ra);
for (i = l - 1; i >= 0; i--) {
uint8_t byte = access_get_byte(env, &srca, i, ra);
access_set_byte(env, &desta, i, byte, ra);
}
+ clear_helper_retaddr();
}
/* move inverse */
@@ -534,11 +548,13 @@ void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
src = wrap_address(env, src - l + 1);
access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+
+ set_helper_retaddr(ra);
for (i = 0; i < l; i++) {
const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
-
access_set_byte(env, &desta, i, x, ra);
}
+ clear_helper_retaddr();
}
/* move numerics */
@@ -555,12 +571,15 @@ void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+
+ set_helper_retaddr(ra);
for (i = 0; i < l; i++) {
const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
(access_get_byte(env, &srca2, i, ra) & 0xf0);
access_set_byte(env, &desta, i, x, ra);
}
+ clear_helper_retaddr();
}
/* move with offset */
@@ -580,6 +599,8 @@ void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
/* Handle rightmost byte */
byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
+
+ set_helper_retaddr(ra);
byte_src = access_get_byte(env, &srca, len_src - 1, ra);
byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
@@ -595,6 +616,7 @@ void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
byte_dest |= byte_src << 4;
access_set_byte(env, &desta, i, byte_dest, ra);
}
+ clear_helper_retaddr();
}
/* move zones */
@@ -611,12 +633,15 @@ void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+
+ set_helper_retaddr(ra);
for (i = 0; i < l; i++) {
const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
(access_get_byte(env, &srca2, i, ra) & 0x0f);
access_set_byte(env, &desta, i, x, ra);
}
+ clear_helper_retaddr();
}
/* compare unsigned byte arrays */
@@ -961,15 +986,19 @@ uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
*/
access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
+
+ set_helper_retaddr(ra);
for (i = 0; i < len; i++) {
const uint8_t v = access_get_byte(env, &srca, i, ra);
access_set_byte(env, &desta, i, v, ra);
if (v == c) {
+ clear_helper_retaddr();
set_address_zero(env, r1, d + i);
return 1;
}
}
+ clear_helper_retaddr();
set_address_zero(env, r1, d + len);
set_address_zero(env, r2, s + len);
return 3;
@@ -1060,6 +1089,7 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
*dest = wrap_address(env, *dest + len);
} else {
access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
+ set_helper_retaddr(ra);
/* The remaining length selects the padding byte. */
for (i = 0; i < len; (*destlen)--, i++) {
@@ -1069,6 +1099,7 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
access_set_byte(env, &desta, i, pad >> 8, ra);
}
}
+ clear_helper_retaddr();
*dest = wrap_address(env, *dest + len);
}
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 12/12] target/riscv: Simplify probing in vext_ldff
2024-07-23 3:34 [PATCH 00/12] tcg patch queue Richard Henderson
` (10 preceding siblings ...)
2024-07-23 3:34 ` [PATCH 11/12] target/s390x: Use set/clear_helper_retaddr in mem_helper.c Richard Henderson
@ 2024-07-23 3:34 ` Richard Henderson
2024-07-23 14:59 ` [PULL 00/12] tcg patch queue Richard Henderson
12 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2024-07-23 3:34 UTC (permalink / raw)
To: qemu-devel; +Cc: Max Chou, Alistair Francis
The current pairing of tlb_vaddr_to_host with extra is either
inefficient (user-only, with page_check_range) or incorrect
(system, with probe_pages).
For proper non-fault behaviour, use probe_access_flags with
its nonfault parameter set to true.
Reviewed-by: Max Chou <max.chou@sifive.com>
Acked-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/riscv/vector_helper.c | 31 +++++++++++++++++--------------
1 file changed, 17 insertions(+), 14 deletions(-)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 1b4d5a8e37..10a52ceb5b 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -474,7 +474,6 @@ vext_ldff(void *vd, void *v0, target_ulong base,
vext_ldst_elem_fn *ldst_elem,
uint32_t log2_esz, uintptr_t ra)
{
- void *host;
uint32_t i, k, vl = 0;
uint32_t nf = vext_nf(desc);
uint32_t vm = vext_vm(desc);
@@ -493,27 +492,31 @@ vext_ldff(void *vd, void *v0, target_ulong base,
}
addr = adjust_addr(env, base + i * (nf << log2_esz));
if (i == 0) {
+ /* Allow fault on first element. */
probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
} else {
- /* if it triggers an exception, no need to check watchpoint */
remain = nf << log2_esz;
while (remain > 0) {
+ void *host;
+ int flags;
+
offset = -(addr | TARGET_PAGE_MASK);
- host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index);
- if (host) {
-#ifdef CONFIG_USER_ONLY
- if (!page_check_range(addr, offset, PAGE_READ)) {
- vl = i;
- goto ProbeSuccess;
- }
-#else
- probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
-#endif
- } else {
+
+ /* Probe nonfault on subsequent elements. */
+ flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD,
+ mmu_index, true, &host, 0);
+
+ /*
+ * Stop if invalid (unmapped) or mmio (transaction may fail).
+ * Do not stop if watchpoint, as the spec says that
+ * first-fault should continue to access the same
+ * elements regardless of any watchpoint.
+ */
+ if (flags & ~TLB_WATCHPOINT) {
vl = i;
goto ProbeSuccess;
}
- if (remain <= offset) {
+ if (remain <= offset) {
break;
}
remain -= offset;
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PULL 00/12] tcg patch queue
2024-07-23 3:34 [PATCH 00/12] tcg patch queue Richard Henderson
` (11 preceding siblings ...)
2024-07-23 3:34 ` [PATCH 12/12] target/riscv: Simplify probing in vext_ldff Richard Henderson
@ 2024-07-23 14:59 ` Richard Henderson
12 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2024-07-23 14:59 UTC (permalink / raw)
To: qemu-devel
On 7/23/24 13:34, Richard Henderson wrote:
> The following changes since commit a7ddb48bd1363c8bcdf42776d320289c42191f01:
>
> Merge tag 'pull-aspeed-20240721' ofhttps://github.com/legoater/qemu into staging (2024-07-22 07:52:05 +1000)
>
> are available in the Git repository at:
>
> https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20240723
>
> for you to fetch changes up to 3f57638a7eae5b56f65224c680654a2aaaa09379:
>
> target/riscv: Simplify probing in vext_ldff (2024-07-23 10:57:42 +1000)
>
> ----------------------------------------------------------------
> accel/tcg: Export set/clear_helper_retaddr
> target/arm: Use set_helper_retaddr for dc_zva, sve and sme
> target/ppc: Tidy dcbz helpers
> target/ppc: Use set_helper_retaddr for dcbz
> target/s390x: Use set_helper_retaddr in mem_helper.c
Applied, thanks. Please update https://wiki.qemu.org/ChangeLog/9.1 as appropriate.
r~
^ permalink raw reply [flat|nested] 14+ messages in thread