* [Qemu-devel] [PATCH v4 0/3] target/arm: Reduce overhead of cpu_get_tb_cpu_state
@ 2019-08-06 0:05 Richard Henderson
2019-08-06 0:05 ` [Qemu-devel] [PATCH v4 1/3] target/arm: Split out recompute_hflags et al Richard Henderson
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Richard Henderson @ 2019-08-06 0:05 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, qemu-arm
Version 3 was back in February:
https://lists.gnu.org/archive/html/qemu-devel/2019-02/msg06002.html
Changes since v3:
* Rebase.
* Do not cache XSCALE_CPAR now that it overlaps VECSTRIDE.
* Leave the new v7m bits as uncached. I haven't figured
out all of the ways fpccr is modified.
Changes since v2:
* Do not cache VECLEN, VECSTRIDE, VFPEN.
These variables come from VFP_FPSCR and VFP_FPEXC, not from
system control registers.
* Move HANDLER and STACKCHECK to rebuild_hflags_a32,
instead of building them in rebuild_hflags_common.
Changes since v1:
* Apparently I had started a last-minute API change, and failed to
covert all of the users, and also failed to re-test afterward.
* Retain assertions for --enable-debug-tcg.
r~
Richard Henderson (3):
target/arm: Split out recompute_hflags et al
target/arm: Rebuild hflags at EL changes and MSR writes
target/arm: Rely on hflags correct in cpu_get_tb_cpu_state
target/arm/cpu.h | 35 ++--
target/arm/helper.h | 3 +
target/arm/internals.h | 3 +
linux-user/syscall.c | 1 +
target/arm/cpu.c | 1 +
target/arm/helper-a64.c | 3 +
target/arm/helper.c | 334 ++++++++++++++++++++++---------------
target/arm/machine.c | 1 +
target/arm/op_helper.c | 1 +
target/arm/translate-a64.c | 6 +-
target/arm/translate.c | 14 +-
11 files changed, 254 insertions(+), 148 deletions(-)
--
2.17.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH v4 1/3] target/arm: Split out recompute_hflags et al
2019-08-06 0:05 [Qemu-devel] [PATCH v4 0/3] target/arm: Reduce overhead of cpu_get_tb_cpu_state Richard Henderson
@ 2019-08-06 0:05 ` Richard Henderson
2019-08-06 0:05 ` [Qemu-devel] [PATCH v4 2/3] target/arm: Rebuild hflags at EL changes and MSR writes Richard Henderson
2019-08-06 0:05 ` [Qemu-devel] [PATCH v4 3/3] target/arm: Rely on hflags correct in cpu_get_tb_cpu_state Richard Henderson
2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2019-08-06 0:05 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, qemu-arm
Create functions to compute the values of the a64 and a32 hflags,
as well as one to compute the values that are shared between them.
For now, the env->hflags variable is not used, and the results are
fed back to cpu_get_tb_cpu_state.
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v3: Do not cache VECLEN, VECSTRIDE, VFPEN.
Move HANDLER and STACKCHECK to rebuild_hflags_a32.
v4: Do not cache XSCALE_CPAR now that it overlaps VECSTRIDE.
---
target/arm/cpu.h | 35 +++--
target/arm/helper.h | 3 +
target/arm/internals.h | 3 +
target/arm/helper.c | 322 ++++++++++++++++++++++++-----------------
4 files changed, 218 insertions(+), 145 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 94c990cddb..c13633e6a0 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -231,6 +231,9 @@ typedef struct CPUARMState {
uint32_t pstate;
uint32_t aarch64; /* 1 if CPU is in aarch64 state; inverse of PSTATE.nRW */
+ /* Cached TBFLAGS state. See below for which bits are included. */
+ uint32_t hflags;
+
/* Frequently accessed CPSR bits are stored separately for efficiency.
This contains all the other bits. Use cpsr_{read,write} to access
the whole CPSR. */
@@ -3130,27 +3133,31 @@ typedef ARMCPU ArchCPU;
#include "exec/cpu-all.h"
-/* Bit usage in the TB flags field: bit 31 indicates whether we are
+/*
+ * Bit usage in the TB flags field: bit 31 indicates whether we are
* in 32 or 64 bit mode. The meaning of the other bits depends on that.
* We put flags which are shared between 32 and 64 bit mode at the top
* of the word, and flags which apply to only one mode at the bottom.
+ *
+ * Unless otherwise noted, these bits are cached in env->hflags.
*/
FIELD(TBFLAG_ANY, AARCH64_STATE, 31, 1)
FIELD(TBFLAG_ANY, MMUIDX, 28, 3)
FIELD(TBFLAG_ANY, SS_ACTIVE, 27, 1)
-FIELD(TBFLAG_ANY, PSTATE_SS, 26, 1)
+FIELD(TBFLAG_ANY, PSTATE_SS, 26, 1) /* Not cached. */
/* Target EL if we take a floating-point-disabled exception */
FIELD(TBFLAG_ANY, FPEXC_EL, 24, 2)
FIELD(TBFLAG_ANY, BE_DATA, 23, 1)
/* Bit usage when in AArch32 state: */
-FIELD(TBFLAG_A32, THUMB, 0, 1)
-FIELD(TBFLAG_A32, VECLEN, 1, 3)
-FIELD(TBFLAG_A32, VECSTRIDE, 4, 2)
+FIELD(TBFLAG_A32, THUMB, 0, 1) /* Not cached. */
+FIELD(TBFLAG_A32, VECLEN, 1, 3) /* Not cached. */
+FIELD(TBFLAG_A32, VECSTRIDE, 4, 2) /* Not cached. */
/*
* We store the bottom two bits of the CPAR as TB flags and handle
* checks on the other bits at runtime. This shares the same bits as
* VECSTRIDE, which is OK as no XScale CPU has VFP.
+ * Not cached, because VECLEN+VECSTRIDE are not cached.
*/
FIELD(TBFLAG_A32, XSCALE_CPAR, 4, 2)
/*
@@ -3159,15 +3166,15 @@ FIELD(TBFLAG_A32, XSCALE_CPAR, 4, 2)
* the same thing as the current security state of the processor!
*/
FIELD(TBFLAG_A32, NS, 6, 1)
-FIELD(TBFLAG_A32, VFPEN, 7, 1)
-FIELD(TBFLAG_A32, CONDEXEC, 8, 8)
+FIELD(TBFLAG_A32, VFPEN, 7, 1) /* Not cached. */
+FIELD(TBFLAG_A32, CONDEXEC, 8, 8) /* Not cached. */
FIELD(TBFLAG_A32, SCTLR_B, 16, 1)
/* For M profile only, set if FPCCR.LSPACT is set */
-FIELD(TBFLAG_A32, LSPACT, 18, 1)
+FIELD(TBFLAG_A32, LSPACT, 18, 1) /* Not cached. */
/* For M profile only, set if we must create a new FP context */
-FIELD(TBFLAG_A32, NEW_FP_CTXT_NEEDED, 19, 1)
+FIELD(TBFLAG_A32, NEW_FP_CTXT_NEEDED, 19, 1) /* Not cached. */
/* For M profile only, set if FPCCR.S does not match current security state */
-FIELD(TBFLAG_A32, FPCCR_S_WRONG, 20, 1)
+FIELD(TBFLAG_A32, FPCCR_S_WRONG, 20, 1) /* Not cached. */
/* For M profile only, Handler (ie not Thread) mode */
FIELD(TBFLAG_A32, HANDLER, 21, 1)
/* For M profile only, whether we should generate stack-limit checks */
@@ -3179,7 +3186,7 @@ FIELD(TBFLAG_A64, SVEEXC_EL, 2, 2)
FIELD(TBFLAG_A64, ZCR_LEN, 4, 4)
FIELD(TBFLAG_A64, PAUTH_ACTIVE, 8, 1)
FIELD(TBFLAG_A64, BT, 9, 1)
-FIELD(TBFLAG_A64, BTYPE, 10, 2)
+FIELD(TBFLAG_A64, BTYPE, 10, 2) /* Not cached. */
FIELD(TBFLAG_A64, TBID, 12, 2)
static inline bool bswap_code(bool sctlr_b)
@@ -3264,6 +3271,12 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void
*opaque);
+/**
+ * arm_rebuild_hflags:
+ * Rebuild the cached TBFLAGS for arbitrary changed processor state.
+ */
+void arm_rebuild_hflags(CPUARMState *env);
+
/**
* aa32_vfp_dreg:
* Return a pointer to the Dn register within env in 32-bit mode.
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 132aa1682e..3919acbe63 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -91,6 +91,9 @@ DEF_HELPER_4(msr_banked, void, env, i32, i32, i32)
DEF_HELPER_2(get_user_reg, i32, env, i32)
DEF_HELPER_3(set_user_reg, void, env, i32, i32)
+DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, i32)
+DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, i32)
+
DEF_HELPER_1(vfp_get_fpscr, i32, env)
DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 232d963875..db6f010798 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -992,6 +992,9 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, uint64_t va,
ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
ARMMMUIdx mmu_idx, bool data);
+uint32_t rebuild_hflags_a32(CPUARMState *env, int el);
+uint32_t rebuild_hflags_a64(CPUARMState *env, int el);
+
static inline int exception_target_el(CPUARMState *env)
{
int target_el = MAX(1, arm_current_el(env));
diff --git a/target/arm/helper.c b/target/arm/helper.c
index b74c23a9bc..43b7c41f11 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -11013,165 +11013,219 @@ ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env)
}
#endif
-void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
- target_ulong *cs_base, uint32_t *pflags)
+static uint32_t common_hflags(CPUARMState *env, int el, ARMMMUIdx mmu_idx,
+ int fp_el, uint32_t flags)
{
- ARMMMUIdx mmu_idx = arm_mmu_idx(env);
- int current_el = arm_current_el(env);
- int fp_el = fp_exception_el(env, current_el);
- uint32_t flags = 0;
-
- if (is_a64(env)) {
- ARMCPU *cpu = env_archcpu(env);
- uint64_t sctlr;
-
- *pc = env->pc;
- flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1);
-
- /* Get control bits for tagged addresses. */
- {
- ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
- ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
- int tbii, tbid;
-
- /* FIXME: ARMv8.1-VHE S2 translation regime. */
- if (regime_el(env, stage1) < 2) {
- ARMVAParameters p1 = aa64_va_parameters_both(env, -1, stage1);
- tbid = (p1.tbi << 1) | p0.tbi;
- tbii = tbid & ~((p1.tbid << 1) | p0.tbid);
- } else {
- tbid = p0.tbi;
- tbii = tbid & !p0.tbid;
- }
-
- flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii);
- flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid);
- }
-
- if (cpu_isar_feature(aa64_sve, cpu)) {
- int sve_el = sve_exception_el(env, current_el);
- uint32_t zcr_len;
-
- /* If SVE is disabled, but FP is enabled,
- * then the effective len is 0.
- */
- if (sve_el != 0 && fp_el == 0) {
- zcr_len = 0;
- } else {
- zcr_len = sve_zcr_len_for_el(env, current_el);
- }
- flags = FIELD_DP32(flags, TBFLAG_A64, SVEEXC_EL, sve_el);
- flags = FIELD_DP32(flags, TBFLAG_A64, ZCR_LEN, zcr_len);
- }
-
- sctlr = arm_sctlr(env, current_el);
-
- if (cpu_isar_feature(aa64_pauth, cpu)) {
- /*
- * In order to save space in flags, we record only whether
- * pauth is "inactive", meaning all insns are implemented as
- * a nop, or "active" when some action must be performed.
- * The decision of which action to take is left to a helper.
- */
- if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) {
- flags = FIELD_DP32(flags, TBFLAG_A64, PAUTH_ACTIVE, 1);
- }
- }
-
- if (cpu_isar_feature(aa64_bti, cpu)) {
- /* Note that SCTLR_EL[23].BT == SCTLR_BT1. */
- if (sctlr & (current_el == 0 ? SCTLR_BT0 : SCTLR_BT1)) {
- flags = FIELD_DP32(flags, TBFLAG_A64, BT, 1);
- }
- flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
- }
- } else {
- *pc = env->regs[15];
- flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
- flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN, env->vfp.vec_len);
- flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE, env->vfp.vec_stride);
- flags = FIELD_DP32(flags, TBFLAG_A32, CONDEXEC, env->condexec_bits);
- flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, arm_sctlr_b(env));
- flags = FIELD_DP32(flags, TBFLAG_A32, NS, !access_secure_reg(env));
- if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
- || arm_el_is_aa64(env, 1) || arm_feature(env, ARM_FEATURE_M)) {
- flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
- }
- /* Note that XSCALE_CPAR shares bits with VECSTRIDE */
- if (arm_feature(env, ARM_FEATURE_XSCALE)) {
- flags = FIELD_DP32(flags, TBFLAG_A32,
- XSCALE_CPAR, env->cp15.c15_cpar);
- }
- }
-
- flags = FIELD_DP32(flags, TBFLAG_ANY, MMUIDX, arm_to_core_mmu_idx(mmu_idx));
-
- /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
- * states defined in the ARM ARM for software singlestep:
- * SS_ACTIVE PSTATE.SS State
- * 0 x Inactive (the TB flag for SS is always 0)
- * 1 0 Active-pending
- * 1 1 Active-not-pending
- */
- if (arm_singlestep_active(env)) {
- flags = FIELD_DP32(flags, TBFLAG_ANY, SS_ACTIVE, 1);
- if (is_a64(env)) {
- if (env->pstate & PSTATE_SS) {
- flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1);
- }
- } else {
- if (env->uncached_cpsr & PSTATE_SS) {
- flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1);
- }
- }
- }
+ flags = FIELD_DP32(flags, TBFLAG_ANY, FPEXC_EL, fp_el);
+ flags = FIELD_DP32(flags, TBFLAG_ANY, MMUIDX,
+ arm_to_core_mmu_idx(mmu_idx));
if (arm_cpu_data_is_big_endian(env)) {
flags = FIELD_DP32(flags, TBFLAG_ANY, BE_DATA, 1);
}
- flags = FIELD_DP32(flags, TBFLAG_ANY, FPEXC_EL, fp_el);
+ if (arm_singlestep_active(env)) {
+ flags = FIELD_DP32(flags, TBFLAG_ANY, SS_ACTIVE, 1);
+ }
+ return flags;
+}
+
+uint32_t rebuild_hflags_a32(CPUARMState *env, int el)
+{
+ uint32_t flags = 0;
+ ARMMMUIdx mmu_idx;
+ int fp_el;
+
+ flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, arm_sctlr_b(env));
+ flags = FIELD_DP32(flags, TBFLAG_A32, NS, !access_secure_reg(env));
if (arm_v7m_is_handler_mode(env)) {
flags = FIELD_DP32(flags, TBFLAG_A32, HANDLER, 1);
}
- /* v8M always applies stack limit checks unless CCR.STKOFHFNMIGN is
- * suppressing them because the requested execution priority is less than 0.
+ mmu_idx = arm_mmu_idx(env);
+
+ /*
+ * v8M always applies stack limit checks unless CCR.STKOFHFNMIGN
+ * is suppressing them because the requested execution priority
+ * is less than 0.
*/
if (arm_feature(env, ARM_FEATURE_V8) &&
arm_feature(env, ARM_FEATURE_M) &&
- !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) &&
+ !((mmu_idx & ARM_MMU_IDX_M_NEGPRI) &&
(env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) {
flags = FIELD_DP32(flags, TBFLAG_A32, STACKCHECK, 1);
}
- if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
- FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S) != env->v7m.secure) {
- flags = FIELD_DP32(flags, TBFLAG_A32, FPCCR_S_WRONG, 1);
+ fp_el = fp_exception_el(env, el);
+ return common_hflags(env, el, mmu_idx, fp_el, flags);
+}
+
+uint32_t rebuild_hflags_a64(CPUARMState *env, int el)
+{
+ ARMCPU *cpu = env_archcpu(env);
+ ARMMMUIdx mmu_idx = arm_mmu_idx(env);
+ ARMMMUIdx stage1 = stage_1_mmu_idx(mmu_idx);
+ ARMVAParameters p0 = aa64_va_parameters_both(env, 0, stage1);
+ int fp_el = fp_exception_el(env, el);
+ uint32_t flags = 0;
+ uint64_t sctlr;
+ int tbii, tbid;
+
+ flags = FIELD_DP32(flags, TBFLAG_ANY, AARCH64_STATE, 1);
+
+ /* Get control bits for tagged addresses. */
+ /* FIXME: ARMv8.1-VHE S2 translation regime. */
+ if (regime_el(env, stage1) < 2) {
+ ARMVAParameters p1 = aa64_va_parameters_both(env, -1, stage1);
+ tbid = (p1.tbi << 1) | p0.tbi;
+ tbii = tbid & ~((p1.tbid << 1) | p0.tbid);
+ } else {
+ tbid = p0.tbi;
+ tbii = tbid & !p0.tbid;
}
- if (arm_feature(env, ARM_FEATURE_M) &&
- (env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) &&
- (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) ||
- (env->v7m.secure &&
- !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) {
- /*
- * ASPEN is set, but FPCA/SFPA indicate that there is no active
- * FP context; we must create a new FP context before executing
- * any FP insn.
+ flags = FIELD_DP32(flags, TBFLAG_A64, TBII, tbii);
+ flags = FIELD_DP32(flags, TBFLAG_A64, TBID, tbid);
+
+ if (cpu_isar_feature(aa64_sve, cpu)) {
+ int sve_el = sve_exception_el(env, el);
+ uint32_t zcr_len;
+
+ /* If SVE is disabled, but FP is enabled,
+ * then the effective len is 0.
*/
- flags = FIELD_DP32(flags, TBFLAG_A32, NEW_FP_CTXT_NEEDED, 1);
+ if (sve_el != 0 && fp_el == 0) {
+ zcr_len = 0;
+ } else {
+ zcr_len = sve_zcr_len_for_el(env, el);
+ }
+ flags = FIELD_DP32(flags, TBFLAG_A64, SVEEXC_EL, sve_el);
+ flags = FIELD_DP32(flags, TBFLAG_A64, ZCR_LEN, zcr_len);
}
- if (arm_feature(env, ARM_FEATURE_M)) {
- bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
-
- if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) {
- flags = FIELD_DP32(flags, TBFLAG_A32, LSPACT, 1);
+ if (el == 0) {
+ /* FIXME: ARMv8.1-VHE S2 translation regime. */
+ sctlr = env->cp15.sctlr_el[1];
+ } else {
+ sctlr = env->cp15.sctlr_el[el];
+ }
+ if (cpu_isar_feature(aa64_pauth, cpu)) {
+ /*
+ * In order to save space in flags, we record only whether
+ * pauth is "inactive", meaning all insns are implemented as
+ * a nop, or "active" when some action must be performed.
+ * The decision of which action to take is left to a helper.
+ */
+ if (sctlr & (SCTLR_EnIA | SCTLR_EnIB | SCTLR_EnDA | SCTLR_EnDB)) {
+ flags = FIELD_DP32(flags, TBFLAG_A64, PAUTH_ACTIVE, 1);
}
}
- *pflags = flags;
+ if (cpu_isar_feature(aa64_bti, cpu)) {
+ /* Note that SCTLR_EL[23].BT == SCTLR_BT1. */
+ if (sctlr & (el == 0 ? SCTLR_BT0 : SCTLR_BT1)) {
+ flags = FIELD_DP32(flags, TBFLAG_A64, BT, 1);
+ }
+ }
+
+ return common_hflags(env, el, mmu_idx, fp_el, flags);
+}
+
+void arm_rebuild_hflags(CPUARMState *env)
+{
+ int el = arm_current_el(env);
+ env->hflags = (is_a64(env)
+ ? rebuild_hflags_a64(env, el)
+ : rebuild_hflags_a32(env, el));
+}
+
+void HELPER(rebuild_hflags_a32)(CPUARMState *env, uint32_t el)
+{
+ tcg_debug_assert(!is_a64(env));
+ env->hflags = rebuild_hflags_a32(env, el);
+}
+
+void HELPER(rebuild_hflags_a64)(CPUARMState *env, uint32_t el)
+{
+ tcg_debug_assert(is_a64(env));
+ env->hflags = rebuild_hflags_a64(env, el);
+}
+
+void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
+ target_ulong *cs_base, uint32_t *pflags)
+{
+ int current_el = arm_current_el(env);
+ uint32_t flags;
+ uint32_t pstate_for_ss;
+
*cs_base = 0;
+ if (is_a64(env)) {
+ *pc = env->pc;
+ flags = rebuild_hflags_a64(env, current_el);
+ flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
+ pstate_for_ss = env->pstate;
+ } else {
+ *pc = env->regs[15];
+ flags = rebuild_hflags_a32(env, current_el);
+ flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
+ flags = FIELD_DP32(flags, TBFLAG_A32, CONDEXEC, env->condexec_bits);
+ /* Note that XSCALE_CPAR shares bits with VECSTRIDE */
+ if (arm_feature(env, ARM_FEATURE_XSCALE)) {
+ flags = FIELD_DP32(flags, TBFLAG_A32, XSCALE_CPAR,
+ env->cp15.c15_cpar);
+ } else {
+ flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN, env->vfp.vec_len);
+ flags = FIELD_DP32(flags, TBFLAG_A32, VECSTRIDE,
+ env->vfp.vec_stride);
+ }
+ if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
+ || arm_el_is_aa64(env, 1) || arm_feature(env, ARM_FEATURE_M)) {
+ flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
+ }
+
+ /* TODO: Perhaps cache these bits too? */
+ if (arm_feature(env, ARM_FEATURE_M)) {
+ if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
+ FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S)
+ != env->v7m.secure) {
+ flags = FIELD_DP32(flags, TBFLAG_A32, FPCCR_S_WRONG, 1);
+ }
+
+ if ((env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) &&
+ (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) ||
+ (env->v7m.secure &&
+ !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) {
+ /*
+ * ASPEN is set, but FPCA/SFPA indicate that there is no
+ * active FP context; we must create a new FP context
+ * before executing any FP insn.
+ */
+ flags = FIELD_DP32(flags, TBFLAG_A32, NEW_FP_CTXT_NEEDED, 1);
+ }
+
+ bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
+ if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) {
+ flags = FIELD_DP32(flags, TBFLAG_A32, LSPACT, 1);
+ }
+ }
+
+ pstate_for_ss = env->uncached_cpsr;
+ }
+
+ /*
+ * The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
+ * states defined in the ARM ARM for software singlestep:
+ * SS_ACTIVE PSTATE.SS State
+ * 0 x Inactive (the TB flag for SS is always 0)
+ * 1 0 Active-pending
+ * 1 1 Active-not-pending
+ * SS_ACTIVE is set in hflags; PSTATE_SS is computed every TB.
+ */
+ if (FIELD_EX32(flags, TBFLAG_ANY, SS_ACTIVE)
+ && (pstate_for_ss & PSTATE_SS)) {
+ flags = FIELD_DP32(flags, TBFLAG_ANY, PSTATE_SS, 1);
+ }
+
+ *pflags = flags;
}
#ifdef TARGET_AARCH64
--
2.17.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH v4 2/3] target/arm: Rebuild hflags at EL changes and MSR writes
2019-08-06 0:05 [Qemu-devel] [PATCH v4 0/3] target/arm: Reduce overhead of cpu_get_tb_cpu_state Richard Henderson
2019-08-06 0:05 ` [Qemu-devel] [PATCH v4 1/3] target/arm: Split out recompute_hflags et al Richard Henderson
@ 2019-08-06 0:05 ` Richard Henderson
2019-08-06 0:05 ` [Qemu-devel] [PATCH v4 3/3] target/arm: Rely on hflags correct in cpu_get_tb_cpu_state Richard Henderson
2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2019-08-06 0:05 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, qemu-arm
Now setting, but not relying upon, env->hflags.
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Fixed partial conversion to assignment to env->hflags.
---
linux-user/syscall.c | 1 +
target/arm/cpu.c | 1 +
target/arm/helper-a64.c | 3 +++
target/arm/helper.c | 2 ++
target/arm/machine.c | 1 +
target/arm/op_helper.c | 1 +
target/arm/translate-a64.c | 6 +++++-
target/arm/translate.c | 14 ++++++++++++--
8 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 8367cb138d..55d5fdadf3 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -9979,6 +9979,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
aarch64_sve_narrow_vq(env, vq);
}
env->vfp.zcr_el[1] = vq - 1;
+ arm_rebuild_hflags(env);
ret = vq * 16;
}
return ret;
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index ec2ab95dbe..995f4ea355 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -406,6 +406,7 @@ static void arm_cpu_reset(CPUState *s)
hw_breakpoint_update_all(cpu);
hw_watchpoint_update_all(cpu);
+ arm_rebuild_hflags(env);
}
bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 060699b901..3bc364ebb7 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -1025,6 +1025,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
} else {
env->regs[15] = new_pc & ~0x3;
}
+ env->hflags = rebuild_hflags_a32(env, new_el);
qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
"AArch32 EL%d PC 0x%" PRIx32 "\n",
cur_el, new_el, env->regs[15]);
@@ -1036,10 +1037,12 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
}
aarch64_restore_sp(env, new_el);
env->pc = new_pc;
+ env->hflags = rebuild_hflags_a64(env, new_el);
qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
"AArch64 EL%d PC 0x%" PRIx64 "\n",
cur_el, new_el, env->pc);
}
+
/*
* Note that cur_el can never be 0. If new_el is 0, then
* el0_a64 is return_to_aa64, else el0_a64 is ignored.
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 43b7c41f11..9b07350cfe 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -7905,6 +7905,7 @@ static void take_aarch32_exception(CPUARMState *env, int new_mode,
env->regs[14] = env->regs[15] + offset;
}
env->regs[15] = newpc;
+ env->hflags = rebuild_hflags_a32(env, arm_current_el(env));
}
static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs)
@@ -8251,6 +8252,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
pstate_write(env, PSTATE_DAIF | new_mode);
env->aarch64 = 1;
+ env->hflags = rebuild_hflags_a64(env, new_el);
aarch64_restore_sp(env, new_el);
env->pc = addr;
diff --git a/target/arm/machine.c b/target/arm/machine.c
index 3fd319a309..838d154a3c 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -758,6 +758,7 @@ static int cpu_post_load(void *opaque, int version_id)
if (!kvm_enabled()) {
pmu_op_finish(&cpu->env);
}
+ arm_rebuild_hflags(&cpu->env);
return 0;
}
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index 5e1625a1c8..6e6613b8a8 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -420,6 +420,7 @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val)
*/
env->regs[15] &= (env->thumb ? ~1 : ~3);
+ env->hflags = rebuild_hflags_a32(env, arm_current_el(env));
qemu_mutex_lock_iothread();
arm_call_el_change_hook(env_archcpu(env));
qemu_mutex_unlock_iothread();
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index d3231477a2..f8b5debf82 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1799,11 +1799,15 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
/* I/O operations must end the TB here (whether read or write) */
gen_io_end();
s->base.is_jmp = DISAS_UPDATE;
- } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
+ }
+ if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
/* We default to ending the TB on a coprocessor register write,
* but allow this to be suppressed by the register definition
* (usually only necessary to work around guest bugs).
*/
+ TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
+ gen_helper_rebuild_hflags_a64(cpu_env, tcg_el);
+ tcg_temp_free_i32(tcg_el);
s->base.is_jmp = DISAS_UPDATE;
}
}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 7853462b21..cd14bbebf1 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -7164,6 +7164,8 @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn)
ri = get_arm_cp_reginfo(s->cp_regs,
ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
if (ri) {
+ bool need_exit_tb;
+
/* Check access permissions */
if (!cp_access_ok(s->current_el, ri, isread)) {
return 1;
@@ -7336,15 +7338,23 @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn)
}
}
+ need_exit_tb = false;
if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
/* I/O operations must end the TB here (whether read or write) */
gen_io_end();
- gen_lookup_tb(s);
- } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
+ need_exit_tb = true;
+ }
+ if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
/* We default to ending the TB on a coprocessor register write,
* but allow this to be suppressed by the register definition
* (usually only necessary to work around guest bugs).
*/
+ TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
+ gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
+ tcg_temp_free_i32(tcg_el);
+ need_exit_tb = true;
+ }
+ if (need_exit_tb) {
gen_lookup_tb(s);
}
--
2.17.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH v4 3/3] target/arm: Rely on hflags correct in cpu_get_tb_cpu_state
2019-08-06 0:05 [Qemu-devel] [PATCH v4 0/3] target/arm: Reduce overhead of cpu_get_tb_cpu_state Richard Henderson
2019-08-06 0:05 ` [Qemu-devel] [PATCH v4 1/3] target/arm: Split out recompute_hflags et al Richard Henderson
2019-08-06 0:05 ` [Qemu-devel] [PATCH v4 2/3] target/arm: Rebuild hflags at EL changes and MSR writes Richard Henderson
@ 2019-08-06 0:05 ` Richard Henderson
2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2019-08-06 0:05 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, qemu-arm
This is the payoff.
From perf record -g data of ubuntu 18 boot and shutdown:
BEFORE:
- 23.02% 2.82% qemu-system-aar [.] helper_lookup_tb_ptr
- 20.22% helper_lookup_tb_ptr
+ 10.05% tb_htable_lookup
- 9.13% cpu_get_tb_cpu_state
3.20% aa64_va_parameters_both
0.55% fp_exception_el
- 11.66% 4.74% qemu-system-aar [.] cpu_get_tb_cpu_state
- 6.96% cpu_get_tb_cpu_state
3.63% aa64_va_parameters_both
0.60% fp_exception_el
0.53% sve_exception_el
AFTER:
- 16.40% 3.40% qemu-system-aar [.] helper_lookup_tb_ptr
- 13.03% helper_lookup_tb_ptr
+ 11.19% tb_htable_lookup
0.55% cpu_get_tb_cpu_state
0.98% 0.71% qemu-system-aar [.] cpu_get_tb_cpu_state
0.87% 0.24% qemu-system-aar [.] rebuild_hflags_a64
Before, helper_lookup_tb_ptr is the second hottest function in the
application, consuming almost a quarter of the runtime. Within the
entire execution, cpu_get_tb_cpu_state consumes about 12%.
After, helper_lookup_tb_ptr has dropped to the fourth hottest function,
with consumption dropping to a sixth of the runtime. Within the
entire execution, cpu_get_tb_cpu_state has dropped below 1%, and the
supporting function to rebuild hflags also consumes about 1%.
Assertions are retained for --enable-debug-tcg.
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Retain asserts for future debugging.
---
target/arm/helper.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 9b07350cfe..abb8cc52dd 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -11155,19 +11155,29 @@ void HELPER(rebuild_hflags_a64)(CPUARMState *env, uint32_t el)
void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
target_ulong *cs_base, uint32_t *pflags)
{
- int current_el = arm_current_el(env);
- uint32_t flags;
+ uint32_t flags = env->hflags;
uint32_t pstate_for_ss;
+#ifdef CONFIG_DEBUG_TCG
+ {
+ int el = arm_current_el(env);
+ uint32_t check_flags;
+ if (is_a64(env)) {
+ check_flags = rebuild_hflags_a64(env, el);
+ } else {
+ check_flags = rebuild_hflags_a32(env, el);
+ }
+ assert(flags == check_flags);
+ }
+#endif
+
*cs_base = 0;
- if (is_a64(env)) {
+ if (FIELD_EX32(flags, TBFLAG_ANY, AARCH64_STATE)) {
*pc = env->pc;
- flags = rebuild_hflags_a64(env, current_el);
flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
pstate_for_ss = env->pstate;
} else {
*pc = env->regs[15];
- flags = rebuild_hflags_a32(env, current_el);
flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
flags = FIELD_DP32(flags, TBFLAG_A32, CONDEXEC, env->condexec_bits);
/* Note that XSCALE_CPAR shares bits with VECSTRIDE */
--
2.17.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2019-08-06 0:06 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-08-06 0:05 [Qemu-devel] [PATCH v4 0/3] target/arm: Reduce overhead of cpu_get_tb_cpu_state Richard Henderson
2019-08-06 0:05 ` [Qemu-devel] [PATCH v4 1/3] target/arm: Split out recompute_hflags et al Richard Henderson
2019-08-06 0:05 ` [Qemu-devel] [PATCH v4 2/3] target/arm: Rebuild hflags at EL changes and MSR writes Richard Henderson
2019-08-06 0:05 ` [Qemu-devel] [PATCH v4 3/3] target/arm: Rely on hflags correct in cpu_get_tb_cpu_state Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).