* [PATCH 1/4] target/arm: Flush only required tlbs for TCR_EL[12]
2023-02-02 7:52 [PATCH 0/4] target/arm: Cache ARMVAParameters Richard Henderson
@ 2023-02-02 7:52 ` Richard Henderson
2023-02-02 7:52 ` [PATCH 2/4] target/arm: Store tbi for both insns and data in ARMVAParameters Richard Henderson
` (4 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2023-02-02 7:52 UTC (permalink / raw)
To: qemu-devel; +Cc: anders.roxell, qemu-arm
The ASID only affects stage1 of the relevant regime.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.c | 29 +++++++++++++++++++++++------
1 file changed, 23 insertions(+), 6 deletions(-)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 72b37b7cf1..8ad9a667f1 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4054,13 +4054,30 @@ static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
raw_write(env, ri, value);
}
-static void vmsa_tcr_el12_write(CPUARMState *env, const ARMCPRegInfo *ri,
+static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- ARMCPU *cpu = env_archcpu(env);
+ CPUState *cs = env_cpu(env);
- /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */
- tlb_flush(CPU(cpu));
+ /* For AA64, the A1 or AS bits could result in a change of ASID. */
+ tlb_flush_by_mmuidx(cs, (ARMMMUIdxBit_E10_1 |
+ ARMMMUIdxBit_E10_1_PAN |
+ ARMMMUIdxBit_E10_0));
+ raw_write(env, ri, value);
+}
+
+static void vmsa_tcr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ CPUState *cs = env_cpu(env);
+
+ /*
+ * For AA64, the A1 or AS bits could result in a change of ASID.
+ * This only affects the EL2&0 regime, not the EL2 regime.
+ */
+ tlb_flush_by_mmuidx(cs, (ARMMMUIdxBit_E20_2 |
+ ARMMMUIdxBit_E20_2_PAN |
+ ARMMMUIdxBit_E20_0));
raw_write(env, ri, value);
}
@@ -4151,7 +4168,7 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
{ .name = "TCR_EL1", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
.access = PL1_RW, .accessfn = access_tvm_trvm,
- .writefn = vmsa_tcr_el12_write,
+ .writefn = vmsa_tcr_el1_write,
.raw_writefn = raw_write,
.resetvalue = 0,
.fieldoffset = offsetof(CPUARMState, cp15.tcr_el[1]) },
@@ -5894,7 +5911,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
.resetvalue = 0 },
{ .name = "TCR_EL2", .state = ARM_CP_STATE_BOTH,
.opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 2,
- .access = PL2_RW, .writefn = vmsa_tcr_el12_write,
+ .access = PL2_RW, .writefn = vmsa_tcr_el2_write,
.fieldoffset = offsetof(CPUARMState, cp15.tcr_el[2]) },
{ .name = "VTCR", .state = ARM_CP_STATE_AA32,
.cp = 15, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/4] target/arm: Store tbi for both insns and data in ARMVAParameters
2023-02-02 7:52 [PATCH 0/4] target/arm: Cache ARMVAParameters Richard Henderson
2023-02-02 7:52 ` [PATCH 1/4] target/arm: Flush only required tlbs for TCR_EL[12] Richard Henderson
@ 2023-02-02 7:52 ` Richard Henderson
2023-02-16 7:35 ` Philippe Mathieu-Daudé
2023-02-02 7:52 ` [PATCH 3/4] target/arm: Use FIELD for ARMVAParameters Richard Henderson
` (3 subsequent siblings)
5 siblings, 1 reply; 8+ messages in thread
From: Richard Henderson @ 2023-02-02 7:52 UTC (permalink / raw)
To: qemu-devel; +Cc: anders.roxell, qemu-arm
This is slightly more work on the consumer side, but means
we will be able to compute this once for multiple uses.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/internals.h | 5 +++--
target/arm/helper.c | 18 +++++++++---------
target/arm/pauth_helper.c | 29 ++++++++++++++++-------------
target/arm/ptw.c | 6 +++---
4 files changed, 31 insertions(+), 27 deletions(-)
diff --git a/target/arm/internals.h b/target/arm/internals.h
index d9555309df..73b37478bf 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1072,7 +1072,8 @@ typedef struct ARMVAParameters {
unsigned ps : 3;
unsigned sh : 2;
unsigned select : 1;
- bool tbi : 1;
+ bool tbid : 1; /* final TBI for data, not the TBID field */
+ bool tbii : 1; /* final TBI for insns */
bool epd : 1;
bool hpd : 1;
bool tsz_oob : 1; /* tsz has been clamped to legal range */
@@ -1083,7 +1084,7 @@ typedef struct ARMVAParameters {
} ARMVAParameters;
ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
- ARMMMUIdx mmu_idx, bool data);
+ ARMMMUIdx mmu_idx);
int aa64_va_parameter_tbi(uint64_t tcr, ARMMMUIdx mmu_idx);
int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx);
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 8ad9a667f1..fda0b9da75 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4874,7 +4874,7 @@ static TLBIRange tlbi_aa64_get_range(CPUARMState *env, ARMMMUIdx mmuidx,
unsigned int page_size_granule, page_shift, num, scale, exponent;
/* Extract one bit to represent the va selector in use. */
uint64_t select = sextract64(value, 36, 1);
- ARMVAParameters param = aa64_va_parameters(env, select, mmuidx, true);
+ ARMVAParameters param = aa64_va_parameters(env, select, mmuidx);
TLBIRange ret = { };
ARMGranuleSize gran;
@@ -11040,11 +11040,11 @@ static ARMGranuleSize sanitize_gran_size(ARMCPU *cpu, ARMGranuleSize gran,
}
ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
- ARMMMUIdx mmu_idx, bool data)
+ ARMMMUIdx mmu_idx)
{
uint64_t tcr = regime_tcr(env, mmu_idx);
bool epd, hpd, tsz_oob, ds, ha, hd;
- int select, tsz, tbi, max_tsz, min_tsz, ps, sh;
+ int select, tsz, tbii, tbid, max_tsz, min_tsz, ps, sh;
ARMGranuleSize gran;
ARMCPU *cpu = env_archcpu(env);
bool stage2 = regime_is_stage2(mmu_idx);
@@ -11147,18 +11147,18 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
}
/* Present TBI as a composite with TBID. */
- tbi = aa64_va_parameter_tbi(tcr, mmu_idx);
- if (!data) {
- tbi &= ~aa64_va_parameter_tbid(tcr, mmu_idx);
- }
- tbi = (tbi >> select) & 1;
+ tbid = aa64_va_parameter_tbi(tcr, mmu_idx);
+ tbii = tbid & ~aa64_va_parameter_tbid(tcr, mmu_idx);
+ tbid = (tbid >> select) & 1;
+ tbii = (tbii >> select) & 1;
return (ARMVAParameters) {
.tsz = tsz,
.ps = ps,
.sh = sh,
.select = select,
- .tbi = tbi,
+ .tbid = tbid,
+ .tbii = tbii,
.epd = epd,
.hpd = hpd,
.tsz_oob = tsz_oob,
diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c
index d0483bf051..bfed6f9722 100644
--- a/target/arm/pauth_helper.c
+++ b/target/arm/pauth_helper.c
@@ -293,19 +293,20 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier,
ARMPACKey *key, bool data)
{
ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env);
- ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data);
+ ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx);
uint64_t pac, ext_ptr, ext, test;
int bot_bit, top_bit;
+ bool tbi = data ? param.tbid : param.tbii;
/* If tagged pointers are in use, use ptr<55>, otherwise ptr<63>. */
- if (param.tbi) {
+ if (tbi) {
ext = sextract64(ptr, 55, 1);
} else {
ext = sextract64(ptr, 63, 1);
}
/* Build a pointer with known good extension bits. */
- top_bit = 64 - 8 * param.tbi;
+ top_bit = 64 - 8 * tbi;
bot_bit = 64 - param.tsz;
ext_ptr = deposit64(ptr, bot_bit, top_bit - bot_bit, ext);
@@ -328,7 +329,7 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier,
* Preserve the determination between upper and lower at bit 55,
* and insert pointer authentication code.
*/
- if (param.tbi) {
+ if (tbi) {
ptr &= ~MAKE_64BIT_MASK(bot_bit, 55 - bot_bit + 1);
pac &= MAKE_64BIT_MASK(bot_bit, 54 - bot_bit + 1);
} else {
@@ -339,12 +340,12 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier,
return pac | ext | ptr;
}
-static uint64_t pauth_original_ptr(uint64_t ptr, ARMVAParameters param)
+static uint64_t pauth_original_ptr(uint64_t ptr, int tsz, bool tbi)
{
/* Note that bit 55 is used whether or not the regime has 2 ranges. */
uint64_t extfield = sextract64(ptr, 55, 1);
- int bot_pac_bit = 64 - param.tsz;
- int top_pac_bit = 64 - 8 * param.tbi;
+ int bot_pac_bit = 64 - tsz;
+ int top_pac_bit = 64 - 8 * tbi;
return deposit64(ptr, bot_pac_bit, top_pac_bit - bot_pac_bit, extfield);
}
@@ -353,19 +354,20 @@ static uint64_t pauth_auth(CPUARMState *env, uint64_t ptr, uint64_t modifier,
ARMPACKey *key, bool data, int keynumber)
{
ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env);
- ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data);
+ ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx);
+ bool tbi = data ? param.tbid : param.tbii;
int bot_bit, top_bit;
uint64_t pac, orig_ptr, test;
- orig_ptr = pauth_original_ptr(ptr, param);
+ orig_ptr = pauth_original_ptr(ptr, param.tsz, tbi);
pac = pauth_computepac(env, orig_ptr, modifier, *key);
bot_bit = 64 - param.tsz;
- top_bit = 64 - 8 * param.tbi;
+ top_bit = 64 - 8 * tbi;
test = (pac ^ ptr) & ~MAKE_64BIT_MASK(55, 1);
if (unlikely(extract64(test, bot_bit, top_bit - bot_bit))) {
int error_code = (keynumber << 1) | (keynumber ^ 1);
- if (param.tbi) {
+ if (tbi) {
return deposit64(orig_ptr, 53, 2, error_code);
} else {
return deposit64(orig_ptr, 61, 2, error_code);
@@ -377,9 +379,10 @@ static uint64_t pauth_auth(CPUARMState *env, uint64_t ptr, uint64_t modifier,
static uint64_t pauth_strip(CPUARMState *env, uint64_t ptr, bool data)
{
ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env);
- ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data);
+ ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx);
+ bool tbi = data ? param.tbid : param.tbii;
- return pauth_original_ptr(ptr, param);
+ return pauth_original_ptr(ptr, param.tsz, tbi);
}
static G_NORETURN
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index 57f3615a66..a19d714985 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -1193,8 +1193,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
if (aarch64) {
int ps;
- param = aa64_va_parameters(env, address, mmu_idx,
- access_type != MMU_INST_FETCH);
+ param = aa64_va_parameters(env, address, mmu_idx);
level = 0;
/*
@@ -1210,7 +1209,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
goto do_translation_fault;
}
- addrsize = 64 - 8 * param.tbi;
+ addrsize = access_type == MMU_INST_FETCH ? param.tbii : param.tbid;
+ addrsize = 64 - 8 * addrsize;
inputsize = 64 - param.tsz;
/*
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/4] target/arm: Use FIELD for ARMVAParameters
2023-02-02 7:52 [PATCH 0/4] target/arm: Cache ARMVAParameters Richard Henderson
2023-02-02 7:52 ` [PATCH 1/4] target/arm: Flush only required tlbs for TCR_EL[12] Richard Henderson
2023-02-02 7:52 ` [PATCH 2/4] target/arm: Store tbi for both insns and data in ARMVAParameters Richard Henderson
@ 2023-02-02 7:52 ` Richard Henderson
2023-02-02 7:52 ` [PATCH 4/4] target/arm: Cache ARMVAParameters Richard Henderson
` (2 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2023-02-02 7:52 UTC (permalink / raw)
To: qemu-devel; +Cc: anders.roxell, qemu-arm
Use hw/registerfields.h instead of bitfields for ARMVAParameters.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 19 ++++++++++++++
target/arm/internals.h | 20 ---------------
target/arm/helper.c | 36 +++++++++++++-------------
target/arm/pauth_helper.c | 20 +++++++++------
target/arm/ptw.c | 53 +++++++++++++++++++++------------------
5 files changed, 79 insertions(+), 69 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 8cf70693be..e961afe88a 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -225,6 +225,25 @@ typedef struct CPUARMTBFlags {
target_ulong flags2;
} CPUARMTBFlags;
+/*
+ * Parameters of a given virtual address, as extracted from the
+ * translation control register (TCR) for a given regime.
+ */
+typedef uint32_t ARMVAParameters;
+ FIELD(ARMVAP, SELECT, 0, 1)
+ FIELD(ARMVAP, TSZ, 1, 8)
+ FIELD(ARMVAP, TSZ_OOB, 9, 1) /* tsz has been clamped to legal range */
+ FIELD(ARMVAP, PS, 10, 3)
+ FIELD(ARMVAP, SH, 13, 2)
+ FIELD(ARMVAP, GRAN, 15, 2)
+ FIELD(ARMVAP, TBID, 17, 1) /* final TBI for data, not TCR TBID field */
+ FIELD(ARMVAP, TBII, 18, 1) /* final TBI for insns */
+ FIELD(ARMVAP, EPD, 19, 1)
+ FIELD(ARMVAP, HPD, 20, 1)
+ FIELD(ARMVAP, DS, 21, 1)
+ FIELD(ARMVAP, HA, 22, 1)
+ FIELD(ARMVAP, HD, 23, 1)
+
typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
typedef struct CPUArchState {
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 73b37478bf..2c24c2f39f 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1063,26 +1063,6 @@ static inline int arm_granule_bits(ARMGranuleSize gran)
}
}
-/*
- * Parameters of a given virtual address, as extracted from the
- * translation control register (TCR) for a given regime.
- */
-typedef struct ARMVAParameters {
- unsigned tsz : 8;
- unsigned ps : 3;
- unsigned sh : 2;
- unsigned select : 1;
- bool tbid : 1; /* final TBI for data, not the TBID field */
- bool tbii : 1; /* final TBI for insns */
- bool epd : 1;
- bool hpd : 1;
- bool tsz_oob : 1; /* tsz has been clamped to legal range */
- bool ds : 1;
- bool ha : 1;
- bool hd : 1;
- ARMGranuleSize gran : 2;
-} ARMVAParameters;
-
ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
ARMMMUIdx mmu_idx);
diff --git a/target/arm/helper.c b/target/arm/helper.c
index fda0b9da75..531a4bebb3 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4882,7 +4882,7 @@ static TLBIRange tlbi_aa64_get_range(CPUARMState *env, ARMMMUIdx mmuidx,
gran = tlbi_range_tg_to_gran_size(page_size_granule);
/* The granule encoded in value must match the granule in use. */
- if (gran != param.gran) {
+ if (gran != FIELD_EX32(param, ARMVAP, GRAN)) {
qemu_log_mask(LOG_GUEST_ERROR, "Invalid tlbi page size granule %d\n",
page_size_granule);
return ret;
@@ -4895,12 +4895,12 @@ static TLBIRange tlbi_aa64_get_range(CPUARMState *env, ARMMMUIdx mmuidx,
ret.length = (num + 1) << (exponent + page_shift);
- if (param.select) {
+ if (FIELD_EX32(param, ARMVAP, SELECT)) {
ret.base = sextract64(value, 0, 37);
} else {
ret.base = extract64(value, 0, 37);
}
- if (param.ds) {
+ if (FIELD_EX32(param, ARMVAP, DS)) {
/*
* With DS=1, BaseADDR is always shifted 16 so that it is able
* to address all 52 va bits. The input address is perforce
@@ -11048,6 +11048,7 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
ARMGranuleSize gran;
ARMCPU *cpu = env_archcpu(env);
bool stage2 = regime_is_stage2(mmu_idx);
+ ARMVAParameters r;
if (!regime_has_2_ranges(mmu_idx)) {
select = 0;
@@ -11152,21 +11153,20 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
tbid = (tbid >> select) & 1;
tbii = (tbii >> select) & 1;
- return (ARMVAParameters) {
- .tsz = tsz,
- .ps = ps,
- .sh = sh,
- .select = select,
- .tbid = tbid,
- .tbii = tbii,
- .epd = epd,
- .hpd = hpd,
- .tsz_oob = tsz_oob,
- .ds = ds,
- .ha = ha,
- .hd = ha && hd,
- .gran = gran,
- };
+ r = FIELD_DP32(0, ARMVAP, SELECT, select);
+ r = FIELD_DP32(r, ARMVAP, TSZ, tsz);
+ r = FIELD_DP32(r, ARMVAP, TSZ_OOB, tsz_oob);
+ r = FIELD_DP32(r, ARMVAP, PS, ps);
+ r = FIELD_DP32(r, ARMVAP, SH, sh);
+ r = FIELD_DP32(r, ARMVAP, GRAN, gran);
+ r = FIELD_DP32(r, ARMVAP, TBID, tbid);
+ r = FIELD_DP32(r, ARMVAP, TBII, tbii);
+ r = FIELD_DP32(r, ARMVAP, EPD, epd);
+ r = FIELD_DP32(r, ARMVAP, HPD, hpd);
+ r = FIELD_DP32(r, ARMVAP, DS, ds);
+ r = FIELD_DP32(r, ARMVAP, HA, ha);
+ r = FIELD_DP32(r, ARMVAP, HD, ha && hd);
+ return r;
}
/*
diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c
index bfed6f9722..1dffcef6c3 100644
--- a/target/arm/pauth_helper.c
+++ b/target/arm/pauth_helper.c
@@ -296,7 +296,9 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier,
ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx);
uint64_t pac, ext_ptr, ext, test;
int bot_bit, top_bit;
- bool tbi = data ? param.tbid : param.tbii;
+ bool tbi = (data ? FIELD_EX32(param, ARMVAP, TBID)
+ : FIELD_EX32(param, ARMVAP, TBII));
+ int tsz = FIELD_EX32(param, ARMVAP, TSZ);
/* If tagged pointers are in use, use ptr<55>, otherwise ptr<63>. */
if (tbi) {
@@ -307,7 +309,7 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier,
/* Build a pointer with known good extension bits. */
top_bit = 64 - 8 * tbi;
- bot_bit = 64 - param.tsz;
+ bot_bit = 64 - tsz;
ext_ptr = deposit64(ptr, bot_bit, top_bit - bot_bit, ext);
pac = pauth_computepac(env, ext_ptr, modifier, *key);
@@ -355,13 +357,15 @@ static uint64_t pauth_auth(CPUARMState *env, uint64_t ptr, uint64_t modifier,
{
ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env);
ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx);
- bool tbi = data ? param.tbid : param.tbii;
+ bool tbi = (data ? FIELD_EX32(param, ARMVAP, TBID)
+ : FIELD_EX32(param, ARMVAP, TBII));
+ int tsz = FIELD_EX32(param, ARMVAP, TSZ);
int bot_bit, top_bit;
uint64_t pac, orig_ptr, test;
- orig_ptr = pauth_original_ptr(ptr, param.tsz, tbi);
+ orig_ptr = pauth_original_ptr(ptr, tsz, tbi);
pac = pauth_computepac(env, orig_ptr, modifier, *key);
- bot_bit = 64 - param.tsz;
+ bot_bit = 64 - tsz;
top_bit = 64 - 8 * tbi;
test = (pac ^ ptr) & ~MAKE_64BIT_MASK(55, 1);
@@ -380,9 +384,11 @@ static uint64_t pauth_strip(CPUARMState *env, uint64_t ptr, bool data)
{
ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env);
ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx);
- bool tbi = data ? param.tbid : param.tbii;
+ bool tbi = (data ? FIELD_EX32(param, ARMVAP, TBID)
+ : FIELD_EX32(param, ARMVAP, TBII));
+ int tsz = FIELD_EX32(param, ARMVAP, TSZ);
- return pauth_original_ptr(ptr, param.tsz, tbi);
+ return pauth_original_ptr(ptr, tsz, tbi);
}
static G_NORETURN
diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index a19d714985..ec3b18e981 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -1014,6 +1014,7 @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
{
uint64_t tcr = regime_tcr(env, mmu_idx);
uint32_t el = regime_el(env, mmu_idx);
+ ARMVAParameters r;
int select, tsz;
bool epd, hpd;
@@ -1065,12 +1066,11 @@ static ARMVAParameters aa32_va_parameters(CPUARMState *env, uint32_t va,
hpd &= extract32(tcr, 6, 1);
}
- return (ARMVAParameters) {
- .tsz = tsz,
- .select = select,
- .epd = epd,
- .hpd = hpd,
- };
+ r = FIELD_DP32(0, ARMVAP, SELECT, select);
+ r = FIELD_DP32(r, ARMVAP, TSZ, tsz);
+ r = FIELD_DP32(r, ARMVAP, EPD, epd);
+ r = FIELD_DP32(r, ARMVAP, HPD, hpd);
+ return r;
}
/*
@@ -1205,13 +1205,17 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* With FEAT_LVA, fault on less than minimum becomes required,
* so our choice is to always raise the fault.
*/
- if (param.tsz_oob) {
+ if (FIELD_EX32(param, ARMVAP, TSZ_OOB)) {
goto do_translation_fault;
}
- addrsize = access_type == MMU_INST_FETCH ? param.tbii : param.tbid;
+ if (access_type == MMU_INST_FETCH) {
+ addrsize = FIELD_EX32(param, ARMVAP, TBII);
+ } else {
+ addrsize = FIELD_EX32(param, ARMVAP, TBID);
+ }
addrsize = 64 - 8 * addrsize;
- inputsize = 64 - param.tsz;
+ inputsize = 64 - FIELD_EX32(param, ARMVAP, TSZ);
/*
* Bound PS by PARANGE to find the effective output address size.
@@ -1219,7 +1223,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* supported mappings can be considered an implementation error.
*/
ps = FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE);
- ps = MIN(ps, param.ps);
+ ps = MIN(ps, FIELD_EX32(param, ARMVAP, PS));
assert(ps < ARRAY_SIZE(pamax_map));
outputsize = pamax_map[ps];
@@ -1227,14 +1231,15 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* With LPA2, the effective output address (OA) size is at most 48 bits
* unless TCR.DS == 1
*/
- if (!param.ds && param.gran != Gran64K) {
+ if (!FIELD_EX32(param, ARMVAP, DS) &&
+ FIELD_EX32(param, ARMVAP, GRAN) != Gran64K) {
outputsize = MIN(outputsize, 48);
}
} else {
param = aa32_va_parameters(env, address, mmu_idx);
level = 1;
addrsize = (mmu_idx == ARMMMUIdx_Stage2 ? 40 : 32);
- inputsize = addrsize - param.tsz;
+ inputsize = addrsize - FIELD_EX32(param, ARMVAP, TSZ);
outputsize = 40;
}
@@ -1250,13 +1255,13 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
if (inputsize < addrsize) {
target_ulong top_bits = sextract64(address, inputsize,
addrsize - inputsize);
- if (-top_bits != param.select) {
+ if (-top_bits != FIELD_EX32(param, ARMVAP, SELECT)) {
/* The gap between the two regions is a Translation fault */
goto do_translation_fault;
}
}
- stride = arm_granule_bits(param.gran) - 3;
+ stride = arm_granule_bits(FIELD_EX32(param, ARMVAP, GRAN)) - 3;
/*
* Note that QEMU ignores shareability and cacheability attributes,
@@ -1266,14 +1271,14 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* implement any ASID-like capability so we can ignore it (instead
* we will always flush the TLB any time the ASID is changed).
*/
- ttbr = regime_ttbr(env, mmu_idx, param.select);
+ ttbr = regime_ttbr(env, mmu_idx, FIELD_EX32(param, ARMVAP, SELECT));
/*
* Here we should have set up all the parameters for the translation:
* inputsize, ttbr, epd, stride, tbi
*/
- if (param.epd) {
+ if (FIELD_EX32(param, ARMVAP, EPD)) {
/*
* Translation table walk disabled => Translation fault on TLB miss
* Note: This is always 0 on 64-bit EL2 and EL3.
@@ -1306,7 +1311,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
bool ok;
/* SL2 is RES0 unless DS=1 & 4kb granule. */
- if (param.ds && stride == 9 && sl2) {
+ if (FIELD_EX32(param, ARMVAP, DS) && stride == 9 && sl2) {
if (sl0 != 0) {
level = 0;
goto do_translation_fault;
@@ -1368,7 +1373,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* For AArch64, the address field goes up to bit 47, or 49 with FEAT_LPA2;
* the highest bits of a 52-bit output are placed elsewhere.
*/
- if (param.ds) {
+ if (FIELD_EX32(param, ARMVAP, DS)) {
descaddrmask = MAKE_64BIT_MASK(0, 50);
} else if (arm_feature(env, ARM_FEATURE_V8)) {
descaddrmask = MAKE_64BIT_MASK(0, 48);
@@ -1425,7 +1430,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* raise AddressSizeFault.
*/
if (outputsize > 48) {
- if (param.ds) {
+ if (FIELD_EX32(param, ARMVAP, DS)) {
descaddr |= extract64(descriptor, 8, 2) << 50;
} else {
descaddr |= extract64(descriptor, 12, 4) << 48;
@@ -1470,7 +1475,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* Otherwise, pass the access fault on to software.
*/
if (!(descriptor & (1 << 10))) {
- if (param.ha) {
+ if (FIELD_EX32(param, ARMVAP, HA)) {
new_descriptor |= 1 << 10; /* AF */
} else {
fi->type = ARMFault_AccessFlag;
@@ -1484,7 +1489,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* bit for writeback. The actual write protection test may still be
* overridden by tableattrs, to be merged below.
*/
- if (param.hd
+ if (FIELD_EX32(param, ARMVAP, HD)
&& extract64(descriptor, 51, 1) /* DBM */
&& access_type == MMU_DATA_STORE) {
if (regime_is_stage2(mmu_idx)) {
@@ -1504,7 +1509,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
attrs = new_descriptor & (MAKE_64BIT_MASK(2, 10) | MAKE_64BIT_MASK(50, 14));
if (!regime_is_stage2(mmu_idx)) {
attrs |= nstable << 5; /* NS */
- if (!param.hpd) {
+ if (!FIELD_EX32(param, ARMVAP, HPD)) {
attrs |= extract64(tableattrs, 0, 2) << 53; /* XN, PXN */
/*
* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
@@ -1582,8 +1587,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, S1Translate *ptw,
* was re-purposed for output address bits. The SH attribute in
* that case comes from TCR_ELx, which we extracted earlier.
*/
- if (param.ds) {
- result->cacheattrs.shareability = param.sh;
+ if (FIELD_EX32(param, ARMVAP, DS)) {
+ result->cacheattrs.shareability = FIELD_EX32(param, ARMVAP, SH);
} else {
result->cacheattrs.shareability = extract32(attrs, 8, 2);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 4/4] target/arm: Cache ARMVAParameters
2023-02-02 7:52 [PATCH 0/4] target/arm: Cache ARMVAParameters Richard Henderson
` (2 preceding siblings ...)
2023-02-02 7:52 ` [PATCH 3/4] target/arm: Use FIELD for ARMVAParameters Richard Henderson
@ 2023-02-02 7:52 ` Richard Henderson
2023-02-16 6:54 ` [PATCH 0/4] " Richard Henderson
2023-02-16 7:47 ` Philippe Mathieu-Daudé
5 siblings, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2023-02-02 7:52 UTC (permalink / raw)
To: qemu-devel; +Cc: anders.roxell, qemu-arm
Cache the processed ARMVAParameters, as the building is
quite expensive, particularly when PAUTH is enabled.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 11 +++++
target/arm/helper.c | 102 ++++++++++++++++++++++++++++++++++++--------
2 files changed, 96 insertions(+), 17 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index e961afe88a..4ab8a0629b 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -243,6 +243,7 @@ typedef uint32_t ARMVAParameters;
FIELD(ARMVAP, DS, 21, 1)
FIELD(ARMVAP, HA, 22, 1)
FIELD(ARMVAP, HD, 23, 1)
+ FIELD(ARMVAP, INIT, 31, 1)
typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
@@ -741,6 +742,16 @@ typedef struct CPUArchState {
/* Optional fault info across tlb lookup. */
ARMMMUFaultInfo *tlb_fi;
+ /* Cached VA Parameters. */
+ struct {
+ ARMVAParameters e0[2];
+ ARMVAParameters e1[2];
+ ARMVAParameters e2[2];
+ ARMVAParameters e3;
+ ARMVAParameters stage2;
+ ARMVAParameters stage2_s;
+ } vap_cache;
+
/* Fields up to this point are cleared by a CPU reset */
struct {} end_reset_fields;
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 531a4bebb3..d573a8c420 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4063,6 +4063,9 @@ static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri,
tlb_flush_by_mmuidx(cs, (ARMMMUIdxBit_E10_1 |
ARMMMUIdxBit_E10_1_PAN |
ARMMMUIdxBit_E10_0));
+ memset(&env->vap_cache.e0, 0, sizeof(env->vap_cache.e0));
+ memset(&env->vap_cache.e1, 0, sizeof(env->vap_cache.e1));
+
raw_write(env, ri, value);
}
@@ -4078,6 +4081,25 @@ static void vmsa_tcr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri,
tlb_flush_by_mmuidx(cs, (ARMMMUIdxBit_E20_2 |
ARMMMUIdxBit_E20_2_PAN |
ARMMMUIdxBit_E20_0));
+ memset(&env->vap_cache.e0, 0, sizeof(env->vap_cache.e0));
+ memset(&env->vap_cache.e2, 0, sizeof(env->vap_cache.e2));
+
+ raw_write(env, ri, value);
+}
+
+static void vmsa_vtcr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ /* Bits of VSTCR_EL2 are shared with VTCR_EL2: flush both. */
+ env->vap_cache.stage2 = 0;
+ env->vap_cache.stage2_s = 0;
+ raw_write(env, ri, value);
+}
+
+static void vmsa_tcr_el3_write(CPUARMState *env, const ARMCPRegInfo *ri,
+ uint64_t value)
+{
+ env->vap_cache.e3 = 0;
raw_write(env, ri, value);
}
@@ -5552,6 +5574,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask)
{
ARMCPU *cpu = env_archcpu(env);
+ uint64_t changed;
if (arm_feature(env, ARM_FEATURE_V8)) {
valid_mask |= MAKE_64BIT_MASK(0, 34); /* ARMv8.0 */
@@ -5605,6 +5628,8 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask)
/* Clear RES0 bits. */
value &= valid_mask;
+ changed = env->cp15.hcr_el2 ^ value;
+ env->cp15.hcr_el2 = value;
/*
* These bits change the MMU setup:
@@ -5614,11 +5639,14 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask)
* HCR_DCT enables tagging on (disabled) stage1 translation
* HCR_FWB changes the interpretation of stage2 descriptor bits
*/
- if ((env->cp15.hcr_el2 ^ value) &
- (HCR_VM | HCR_PTW | HCR_DC | HCR_DCT | HCR_FWB)) {
+ if (changed & (HCR_VM | HCR_PTW | HCR_DC | HCR_DCT | HCR_FWB)) {
tlb_flush(CPU(cpu));
}
- env->cp15.hcr_el2 = value;
+ /* E2H and TGE control {E20_2, E20_0} vs {E2, E10_0} regimes. */
+ if (changed & (HCR_E2H | HCR_TGE)) {
+ memset(&env->vap_cache.e0, 0, sizeof(env->vap_cache.e0));
+ memset(&env->vap_cache.e2, 0, sizeof(env->vap_cache.e2));
+ }
/*
* Updates to VI and VF require us to update the status of
@@ -5915,13 +5943,12 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
.fieldoffset = offsetof(CPUARMState, cp15.tcr_el[2]) },
{ .name = "VTCR", .state = ARM_CP_STATE_AA32,
.cp = 15, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
- .type = ARM_CP_ALIAS,
+ .type = ARM_CP_ALIAS, .writefn = vmsa_vtcr_el2_write,
.access = PL2_RW, .accessfn = access_el3_aa32ns,
.fieldoffset = offsetoflow32(CPUARMState, cp15.vtcr_el2) },
{ .name = "VTCR_EL2", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
- .access = PL2_RW,
- /* no .writefn needed as this can't cause an ASID change */
+ .access = PL2_RW, .writefn = vmsa_vtcr_el2_write,
.fieldoffset = offsetof(CPUARMState, cp15.vtcr_el2) },
{ .name = "VTTBR", .state = ARM_CP_STATE_AA32,
.cp = 15, .opc1 = 6, .crm = 2,
@@ -6154,8 +6181,7 @@ static const ARMCPRegInfo el3_cp_reginfo[] = {
.fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el[3]) },
{ .name = "TCR_EL3", .state = ARM_CP_STATE_AA64,
.opc0 = 3, .opc1 = 6, .crn = 2, .crm = 0, .opc2 = 2,
- .access = PL3_RW,
- /* no .writefn needed as this can't cause an ASID change */
+ .access = PL3_RW, .writefn = vmsa_tcr_el3_write,
.resetvalue = 0,
.fieldoffset = offsetof(CPUARMState, cp15.tcr_el[3]) },
{ .name = "ELR_EL3", .state = ARM_CP_STATE_AA64,
@@ -11039,19 +11065,47 @@ static ARMGranuleSize sanitize_gran_size(ARMCPU *cpu, ARMGranuleSize gran,
return Gran64K;
}
-ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
- ARMMMUIdx mmu_idx)
+static ARMVAParameters *vap_cache(CPUARMState *env, ARMMMUIdx idx, bool sel)
+{
+ switch (idx) {
+ case ARMMMUIdx_Stage2:
+ return &env->vap_cache.stage2;
+ case ARMMMUIdx_Stage2_S:
+ return &env->vap_cache.stage2_s;
+ case ARMMMUIdx_E3:
+ return &env->vap_cache.e3;
+ case ARMMMUIdx_E2:
+ return &env->vap_cache.e2[0];
+ case ARMMMUIdx_E20_2_PAN:
+ case ARMMMUIdx_E20_2:
+ return &env->vap_cache.e2[sel];
+ case ARMMMUIdx_E10_1_PAN:
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_Stage1_E1:
+ case ARMMMUIdx_Stage1_E1_PAN:
+ return &env->vap_cache.e1[sel];
+ case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E10_0:
+ case ARMMMUIdx_Stage1_E0:
+ return &env->vap_cache.e0[sel];
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static ARMVAParameters __attribute__((noinline))
+aa64_va_parameters_build(CPUARMState *env, ARMMMUIdx mmu_idx, bool select)
{
uint64_t tcr = regime_tcr(env, mmu_idx);
bool epd, hpd, tsz_oob, ds, ha, hd;
- int select, tsz, tbii, tbid, max_tsz, min_tsz, ps, sh;
+ int tsz, tbii, tbid, max_tsz, min_tsz, ps, sh;
ARMGranuleSize gran;
ARMCPU *cpu = env_archcpu(env);
bool stage2 = regime_is_stage2(mmu_idx);
ARMVAParameters r;
if (!regime_has_2_ranges(mmu_idx)) {
- select = 0;
+ select = false;
tsz = extract32(tcr, 0, 6);
gran = tg0_to_gran_size(extract32(tcr, 14, 2));
if (stage2) {
@@ -11069,11 +11123,6 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
} else {
bool e0pd;
- /*
- * Bit 55 is always between the two regions, and is canonical for
- * determining if address tagging is enabled.
- */
- select = extract64(va, 55, 1);
if (!select) {
tsz = extract32(tcr, 0, 6);
gran = tg0_to_gran_size(extract32(tcr, 14, 2));
@@ -11166,6 +11215,25 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
r = FIELD_DP32(r, ARMVAP, DS, ds);
r = FIELD_DP32(r, ARMVAP, HA, ha);
r = FIELD_DP32(r, ARMVAP, HD, ha && hd);
+ r = FIELD_DP32(r, ARMVAP, INIT, 1);
+ return r;
+}
+
+ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
+ ARMMMUIdx mmu_idx)
+{
+ /*
+ * Bit 55 is always between the two regions, and is canonical for
+ * determining if address tagging is enabled.
+ * Will be zapped if !regime_has_2_ranges.
+ */
+ bool select = extract64(va, 55, 1);
+ ARMVAParameters *c = vap_cache(env, mmu_idx, select);
+ ARMVAParameters r = *c;
+
+ if (unlikely(!FIELD_EX32(r, ARMVAP, INIT))) {
+ *c = r = aa64_va_parameters_build(env, mmu_idx, select);
+ }
return r;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 0/4] target/arm: Cache ARMVAParameters
2023-02-02 7:52 [PATCH 0/4] target/arm: Cache ARMVAParameters Richard Henderson
` (3 preceding siblings ...)
2023-02-02 7:52 ` [PATCH 4/4] target/arm: Cache ARMVAParameters Richard Henderson
@ 2023-02-16 6:54 ` Richard Henderson
2023-02-16 7:47 ` Philippe Mathieu-Daudé
5 siblings, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2023-02-16 6:54 UTC (permalink / raw)
To: qemu-devel; +Cc: anders.roxell, qemu-arm
Ping.
r~
On 2/1/23 21:52, Richard Henderson wrote:
> Hi Anders,
>
> I'm not well versed on tuxrun, and how to make that work with a qemu
> binary outside of the container, so I'm not sure if I'm comparing
> apples to bananas. Can you look and see if this fixes the kselftest
> slowdown you reported?
>
> Anyway, for a boot and shutdown of your rootfs, I see:
>
> Before:
> 11.13% [.] aa64_va_parameters
> 8.38% [.] helper_lookup_tb_ptr
> 7.37% [.] pauth_computepac
> 3.79% [.] qht_lookup_custom
>
> After:
> 9.17% [.] helper_lookup_tb_ptr
> 8.05% [.] pauth_computepac
> 4.22% [.] qht_lookup_custom
> 3.68% [.] pauth_addpac
> ...
> 1.67% [.] aa64_va_parameters
>
>
> This is all due to the heavy use pauth makes of aa64_va_parameters.
> It "only" needs 2 parameters, tsz and tbi, but tsz is probably the
> most expensive part of aa64_va_parameters -- do anything about that
> and we might as well cache the whole thing.
>
> The change from struct+bitfields to uint32_t+FIELD is meant to combat
> some really ugly code that gcc produced. Seems like they should have
> compiled to the same thing, more or less, but alas.
>
>
> r~
>
>
> Richard Henderson (4):
> target/arm: Flush only required tlbs for TCR_EL[12]
> target/arm: Store tbi for both insns and data in ARMVAParameters
> target/arm: Use FIELD for ARMVAParameters
> target/arm: Cache ARMVAParameters
>
> target/arm/cpu.h | 30 +++++++
> target/arm/internals.h | 21 +----
> target/arm/helper.c | 177 ++++++++++++++++++++++++++++----------
> target/arm/pauth_helper.c | 39 +++++----
> target/arm/ptw.c | 57 ++++++------
> 5 files changed, 217 insertions(+), 107 deletions(-)
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 0/4] target/arm: Cache ARMVAParameters
2023-02-02 7:52 [PATCH 0/4] target/arm: Cache ARMVAParameters Richard Henderson
` (4 preceding siblings ...)
2023-02-16 6:54 ` [PATCH 0/4] " Richard Henderson
@ 2023-02-16 7:47 ` Philippe Mathieu-Daudé
5 siblings, 0 replies; 8+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-02-16 7:47 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: anders.roxell, qemu-arm
Hi Richard,
On 2/2/23 08:52, Richard Henderson wrote:
> Richard Henderson (4):
> target/arm: Flush only required tlbs for TCR_EL[12]
> target/arm: Store tbi for both insns and data in ARMVAParameters
> target/arm: Use FIELD for ARMVAParameters
> target/arm: Cache ARMVAParameters
Applying: target/arm: Flush only required tlbs for TCR_EL[12]
error: patch failed: target/arm/helper.c:4151
error: target/arm/helper.c: patch does not apply
Patch failed at 0001 target/arm: Flush only required tlbs for TCR_EL[12]
What is this series base commit?
^ permalink raw reply [flat|nested] 8+ messages in thread