* [PATCH 1/6] target/ppc: Implement ASDR register for ISA v3.0 for HPT
2023-07-26 18:22 [PATCH 0/6] ppc fixes possibly for 8.1 Nicholas Piggin
@ 2023-07-26 18:22 ` Nicholas Piggin
2023-07-27 13:22 ` Cédric Le Goater
2023-07-26 18:22 ` [PATCH 2/6] target/ppc: Fix VRMA page size for ISA v3.0 Nicholas Piggin
` (5 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Nicholas Piggin @ 2023-07-26 18:22 UTC (permalink / raw)
To: Daniel Henrique Barboza
Cc: Nicholas Piggin, Cédric Le Goater, David Gibson, Greg Kurz,
Harsh Prateek Bora, qemu-ppc, qemu-devel
The ASDR register was introduced in ISA v3.0. It has not been
implemented for HPT. With HPT, ASDR is the format of the slbmte RS
operand (containing VSID), which matches the ppc_slb_t field.
Fixes: 3367c62f522b ("target/ppc: Support for POWER9 native hash")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
target/ppc/mmu-hash64.c | 27 ++++++++++++++++-----------
1 file changed, 16 insertions(+), 11 deletions(-)
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index 900f906990..a0c90df3ce 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -770,7 +770,8 @@ static bool ppc_hash64_use_vrma(CPUPPCState *env)
}
}
-static void ppc_hash64_set_isi(CPUState *cs, int mmu_idx, uint64_t error_code)
+static void ppc_hash64_set_isi(CPUState *cs, int mmu_idx, uint64_t slb_vsid,
+ uint64_t error_code)
{
CPUPPCState *env = &POWERPC_CPU(cs)->env;
bool vpm;
@@ -782,13 +783,15 @@ static void ppc_hash64_set_isi(CPUState *cs, int mmu_idx, uint64_t error_code)
}
if (vpm && !mmuidx_hv(mmu_idx)) {
cs->exception_index = POWERPC_EXCP_HISI;
+ env->spr[SPR_ASDR] = slb_vsid;
} else {
cs->exception_index = POWERPC_EXCP_ISI;
}
env->error_code = error_code;
}
-static void ppc_hash64_set_dsi(CPUState *cs, int mmu_idx, uint64_t dar, uint64_t dsisr)
+static void ppc_hash64_set_dsi(CPUState *cs, int mmu_idx, uint64_t slb_vsid,
+ uint64_t dar, uint64_t dsisr)
{
CPUPPCState *env = &POWERPC_CPU(cs)->env;
bool vpm;
@@ -802,6 +805,7 @@ static void ppc_hash64_set_dsi(CPUState *cs, int mmu_idx, uint64_t dar, uint64_t
cs->exception_index = POWERPC_EXCP_HDSI;
env->spr[SPR_HDAR] = dar;
env->spr[SPR_HDSISR] = dsisr;
+ env->spr[SPR_ASDR] = slb_vsid;
} else {
cs->exception_index = POWERPC_EXCP_DSI;
env->spr[SPR_DAR] = dar;
@@ -963,13 +967,13 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
}
switch (access_type) {
case MMU_INST_FETCH:
- ppc_hash64_set_isi(cs, mmu_idx, SRR1_PROTFAULT);
+ ppc_hash64_set_isi(cs, mmu_idx, 0, SRR1_PROTFAULT);
break;
case MMU_DATA_LOAD:
- ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_PROTFAULT);
+ ppc_hash64_set_dsi(cs, mmu_idx, 0, eaddr, DSISR_PROTFAULT);
break;
case MMU_DATA_STORE:
- ppc_hash64_set_dsi(cs, mmu_idx, eaddr,
+ ppc_hash64_set_dsi(cs, mmu_idx, 0, eaddr,
DSISR_PROTFAULT | DSISR_ISSTORE);
break;
default:
@@ -1022,7 +1026,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
/* 3. Check for segment level no-execute violation */
if (access_type == MMU_INST_FETCH && (slb->vsid & SLB_VSID_N)) {
if (guest_visible) {
- ppc_hash64_set_isi(cs, mmu_idx, SRR1_NOEXEC_GUARD);
+ ppc_hash64_set_isi(cs, mmu_idx, slb->vsid, SRR1_NOEXEC_GUARD);
}
return false;
}
@@ -1035,13 +1039,14 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
}
switch (access_type) {
case MMU_INST_FETCH:
- ppc_hash64_set_isi(cs, mmu_idx, SRR1_NOPTE);
+ ppc_hash64_set_isi(cs, mmu_idx, slb->vsid, SRR1_NOPTE);
break;
case MMU_DATA_LOAD:
- ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_NOPTE);
+ ppc_hash64_set_dsi(cs, mmu_idx, slb->vsid, eaddr, DSISR_NOPTE);
break;
case MMU_DATA_STORE:
- ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_NOPTE | DSISR_ISSTORE);
+ ppc_hash64_set_dsi(cs, mmu_idx, slb->vsid, eaddr,
+ DSISR_NOPTE | DSISR_ISSTORE);
break;
default:
g_assert_not_reached();
@@ -1075,7 +1080,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
if (PAGE_EXEC & ~amr_prot) {
srr1 |= SRR1_IAMR; /* Access violates virt pg class key prot */
}
- ppc_hash64_set_isi(cs, mmu_idx, srr1);
+ ppc_hash64_set_isi(cs, mmu_idx, slb->vsid, srr1);
} else {
int dsisr = 0;
if (need_prot & ~pp_prot) {
@@ -1087,7 +1092,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
if (need_prot & ~amr_prot) {
dsisr |= DSISR_AMR;
}
- ppc_hash64_set_dsi(cs, mmu_idx, eaddr, dsisr);
+ ppc_hash64_set_dsi(cs, mmu_idx, slb->vsid, eaddr, dsisr);
}
return false;
}
--
2.40.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 1/6] target/ppc: Implement ASDR register for ISA v3.0 for HPT
2023-07-26 18:22 ` [PATCH 1/6] target/ppc: Implement ASDR register for ISA v3.0 for HPT Nicholas Piggin
@ 2023-07-27 13:22 ` Cédric Le Goater
0 siblings, 0 replies; 14+ messages in thread
From: Cédric Le Goater @ 2023-07-27 13:22 UTC (permalink / raw)
To: Nicholas Piggin, Daniel Henrique Barboza
Cc: David Gibson, Greg Kurz, Harsh Prateek Bora, qemu-ppc, qemu-devel
On 7/26/23 20:22, Nicholas Piggin wrote:
> The ASDR register was introduced in ISA v3.0. It has not been
> implemented for HPT. With HPT, ASDR is the format of the slbmte RS
> operand (containing VSID), which matches the ppc_slb_t field.
>
> Fixes: 3367c62f522b ("target/ppc: Support for POWER9 native hash")
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
According to the ISA, the Address Access Segment Descriptor Register (ASDR)
can also be set with an MCE. Anyway,
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Thanks,
C.
> ---
> target/ppc/mmu-hash64.c | 27 ++++++++++++++++-----------
> 1 file changed, 16 insertions(+), 11 deletions(-)
>
> diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
> index 900f906990..a0c90df3ce 100644
> --- a/target/ppc/mmu-hash64.c
> +++ b/target/ppc/mmu-hash64.c
> @@ -770,7 +770,8 @@ static bool ppc_hash64_use_vrma(CPUPPCState *env)
> }
> }
>
> -static void ppc_hash64_set_isi(CPUState *cs, int mmu_idx, uint64_t error_code)
> +static void ppc_hash64_set_isi(CPUState *cs, int mmu_idx, uint64_t slb_vsid,
> + uint64_t error_code)
> {
> CPUPPCState *env = &POWERPC_CPU(cs)->env;
> bool vpm;
> @@ -782,13 +783,15 @@ static void ppc_hash64_set_isi(CPUState *cs, int mmu_idx, uint64_t error_code)
> }
> if (vpm && !mmuidx_hv(mmu_idx)) {
> cs->exception_index = POWERPC_EXCP_HISI;
> + env->spr[SPR_ASDR] = slb_vsid;
> } else {
> cs->exception_index = POWERPC_EXCP_ISI;
> }
> env->error_code = error_code;
> }
>
> -static void ppc_hash64_set_dsi(CPUState *cs, int mmu_idx, uint64_t dar, uint64_t dsisr)
> +static void ppc_hash64_set_dsi(CPUState *cs, int mmu_idx, uint64_t slb_vsid,
> + uint64_t dar, uint64_t dsisr)
> {
> CPUPPCState *env = &POWERPC_CPU(cs)->env;
> bool vpm;
> @@ -802,6 +805,7 @@ static void ppc_hash64_set_dsi(CPUState *cs, int mmu_idx, uint64_t dar, uint64_t
> cs->exception_index = POWERPC_EXCP_HDSI;
> env->spr[SPR_HDAR] = dar;
> env->spr[SPR_HDSISR] = dsisr;
> + env->spr[SPR_ASDR] = slb_vsid;
> } else {
> cs->exception_index = POWERPC_EXCP_DSI;
> env->spr[SPR_DAR] = dar;
> @@ -963,13 +967,13 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
> }
> switch (access_type) {
> case MMU_INST_FETCH:
> - ppc_hash64_set_isi(cs, mmu_idx, SRR1_PROTFAULT);
> + ppc_hash64_set_isi(cs, mmu_idx, 0, SRR1_PROTFAULT);
> break;
> case MMU_DATA_LOAD:
> - ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_PROTFAULT);
> + ppc_hash64_set_dsi(cs, mmu_idx, 0, eaddr, DSISR_PROTFAULT);
> break;
> case MMU_DATA_STORE:
> - ppc_hash64_set_dsi(cs, mmu_idx, eaddr,
> + ppc_hash64_set_dsi(cs, mmu_idx, 0, eaddr,
> DSISR_PROTFAULT | DSISR_ISSTORE);
> break;
> default:
> @@ -1022,7 +1026,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
> /* 3. Check for segment level no-execute violation */
> if (access_type == MMU_INST_FETCH && (slb->vsid & SLB_VSID_N)) {
> if (guest_visible) {
> - ppc_hash64_set_isi(cs, mmu_idx, SRR1_NOEXEC_GUARD);
> + ppc_hash64_set_isi(cs, mmu_idx, slb->vsid, SRR1_NOEXEC_GUARD);
> }
> return false;
> }
> @@ -1035,13 +1039,14 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
> }
> switch (access_type) {
> case MMU_INST_FETCH:
> - ppc_hash64_set_isi(cs, mmu_idx, SRR1_NOPTE);
> + ppc_hash64_set_isi(cs, mmu_idx, slb->vsid, SRR1_NOPTE);
> break;
> case MMU_DATA_LOAD:
> - ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_NOPTE);
> + ppc_hash64_set_dsi(cs, mmu_idx, slb->vsid, eaddr, DSISR_NOPTE);
> break;
> case MMU_DATA_STORE:
> - ppc_hash64_set_dsi(cs, mmu_idx, eaddr, DSISR_NOPTE | DSISR_ISSTORE);
> + ppc_hash64_set_dsi(cs, mmu_idx, slb->vsid, eaddr,
> + DSISR_NOPTE | DSISR_ISSTORE);
> break;
> default:
> g_assert_not_reached();
> @@ -1075,7 +1080,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
> if (PAGE_EXEC & ~amr_prot) {
> srr1 |= SRR1_IAMR; /* Access violates virt pg class key prot */
> }
> - ppc_hash64_set_isi(cs, mmu_idx, srr1);
> + ppc_hash64_set_isi(cs, mmu_idx, slb->vsid, srr1);
> } else {
> int dsisr = 0;
> if (need_prot & ~pp_prot) {
> @@ -1087,7 +1092,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type,
> if (need_prot & ~amr_prot) {
> dsisr |= DSISR_AMR;
> }
> - ppc_hash64_set_dsi(cs, mmu_idx, eaddr, dsisr);
> + ppc_hash64_set_dsi(cs, mmu_idx, slb->vsid, eaddr, dsisr);
> }
> return false;
> }
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/6] target/ppc: Fix VRMA page size for ISA v3.0
2023-07-26 18:22 [PATCH 0/6] ppc fixes possibly for 8.1 Nicholas Piggin
2023-07-26 18:22 ` [PATCH 1/6] target/ppc: Implement ASDR register for ISA v3.0 for HPT Nicholas Piggin
@ 2023-07-26 18:22 ` Nicholas Piggin
2023-07-27 13:07 ` Cédric Le Goater
2023-07-26 18:22 ` [PATCH 3/6] target/ppc: Fix pending HDEC when entering PM state Nicholas Piggin
` (4 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Nicholas Piggin @ 2023-07-26 18:22 UTC (permalink / raw)
To: Daniel Henrique Barboza
Cc: Nicholas Piggin, Cédric Le Goater, David Gibson, Greg Kurz,
Harsh Prateek Bora, qemu-ppc, qemu-devel
Until v2.07s, the VRMA page size (L||LP) was encoded in LPCR[VRMASD].
In v3.0 that moved to the partition table PS field.
The powernv machine can now run KVM HPT guests on POWER9/10 CPUs with
this fix and the patch to add ASDR.
Fixes: 3367c62f522b ("target/ppc: Support for POWER9 native hash")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
target/ppc/mmu-hash64.c | 41 +++++++++++++++++++++++++++++++++++------
1 file changed, 35 insertions(+), 6 deletions(-)
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index a0c90df3ce..7f8bbbbdb0 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -874,12 +874,41 @@ static target_ulong rmls_limit(PowerPCCPU *cpu)
return rma_sizes[rmls];
}
-static int build_vrma_slbe(PowerPCCPU *cpu, ppc_slb_t *slb)
+/* Return the LLP in SLB_VSID format */
+static uint64_t get_vrma_llp(PowerPCCPU *cpu)
{
CPUPPCState *env = &cpu->env;
- target_ulong lpcr = env->spr[SPR_LPCR];
- uint32_t vrmasd = (lpcr & LPCR_VRMASD) >> LPCR_VRMASD_SHIFT;
- target_ulong vsid = SLB_VSID_VRMA | ((vrmasd << 4) & SLB_VSID_LLP_MASK);
+ uint64_t llp;
+
+ if (env->mmu_model == POWERPC_MMU_3_00) {
+ ppc_v3_pate_t pate;
+ uint64_t ps;
+
+ /*
+ * ISA v3.0 removes the LPCR[VRMASD] field and puts the VRMA base
+ * page size (L||LP equivalent) in the PS field in the HPT partition
+ * table entry.
+ */
+ if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) {
+ error_report("Bad VRMA with no partition table entry");
+ return 0;
+ }
+ ps = pate.dw0 >> (63 - 58);
+ llp = ((ps & 0x4) << (63 - 55 - 2)) | ((ps & 0x3) << (63 - 59));
+
+ } else {
+ uint64_t lpcr = env->spr[SPR_LPCR];
+ target_ulong vrmasd = (lpcr & LPCR_VRMASD) >> LPCR_VRMASD_SHIFT;
+
+ llp = (vrmasd << 4) & SLB_VSID_LLP_MASK;
+ }
+
+ return llp;
+}
+
+static int build_vrma_slbe(PowerPCCPU *cpu, ppc_slb_t *slb)
+{
+ target_ulong vsid = SLB_VSID_VRMA | get_vrma_llp(cpu);
int i;
for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
@@ -897,8 +926,8 @@ static int build_vrma_slbe(PowerPCCPU *cpu, ppc_slb_t *slb)
}
}
- error_report("Bad page size encoding in LPCR[VRMASD]; LPCR=0x"
- TARGET_FMT_lx, lpcr);
+ error_report("Bad VRMA page size encoding 0x" TARGET_FMT_lx,
+ get_vrma_llp(cpu));
return -1;
}
--
2.40.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 2/6] target/ppc: Fix VRMA page size for ISA v3.0
2023-07-26 18:22 ` [PATCH 2/6] target/ppc: Fix VRMA page size for ISA v3.0 Nicholas Piggin
@ 2023-07-27 13:07 ` Cédric Le Goater
0 siblings, 0 replies; 14+ messages in thread
From: Cédric Le Goater @ 2023-07-27 13:07 UTC (permalink / raw)
To: Nicholas Piggin, Daniel Henrique Barboza
Cc: David Gibson, Greg Kurz, Harsh Prateek Bora, qemu-ppc, qemu-devel
On 7/26/23 20:22, Nicholas Piggin wrote:
> Until v2.07s, the VRMA page size (L||LP) was encoded in LPCR[VRMASD].
> In v3.0 that moved to the partition table PS field.
>
> The powernv machine can now run KVM HPT guests on POWER9/10 CPUs with
> this fix and the patch to add ASDR.
>
> Fixes: 3367c62f522b ("target/ppc: Support for POWER9 native hash")
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> target/ppc/mmu-hash64.c | 41 +++++++++++++++++++++++++++++++++++------
> 1 file changed, 35 insertions(+), 6 deletions(-)
>
> diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
> index a0c90df3ce..7f8bbbbdb0 100644
> --- a/target/ppc/mmu-hash64.c
> +++ b/target/ppc/mmu-hash64.c
> @@ -874,12 +874,41 @@ static target_ulong rmls_limit(PowerPCCPU *cpu)
> return rma_sizes[rmls];
> }
>
> -static int build_vrma_slbe(PowerPCCPU *cpu, ppc_slb_t *slb)
> +/* Return the LLP in SLB_VSID format */
> +static uint64_t get_vrma_llp(PowerPCCPU *cpu)
> {
> CPUPPCState *env = &cpu->env;
> - target_ulong lpcr = env->spr[SPR_LPCR];
> - uint32_t vrmasd = (lpcr & LPCR_VRMASD) >> LPCR_VRMASD_SHIFT;
> - target_ulong vsid = SLB_VSID_VRMA | ((vrmasd << 4) & SLB_VSID_LLP_MASK);
> + uint64_t llp;
> +
> + if (env->mmu_model == POWERPC_MMU_3_00) {
> + ppc_v3_pate_t pate;
> + uint64_t ps;
> +
> + /*
> + * ISA v3.0 removes the LPCR[VRMASD] field and puts the VRMA base
> + * page size (L||LP equivalent) in the PS field in the HPT partition
> + * table entry.
> + */
> + if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) {
> + error_report("Bad VRMA with no partition table entry");
> + return 0;
> + }
> + ps = pate.dw0 >> (63 - 58);
> + llp = ((ps & 0x4) << (63 - 55 - 2)) | ((ps & 0x3) << (63 - 59));
Please add bitfield definitions for these numbers :)
> +
> + } else {
> + uint64_t lpcr = env->spr[SPR_LPCR];
> + target_ulong vrmasd = (lpcr & LPCR_VRMASD) >> LPCR_VRMASD_SHIFT;
> +
> + llp = (vrmasd << 4) & SLB_VSID_LLP_MASK;
> + }
> +
> + return llp;
> +}
> +
> +static int build_vrma_slbe(PowerPCCPU *cpu, ppc_slb_t *slb)
> +{
> + target_ulong vsid = SLB_VSID_VRMA | get_vrma_llp(cpu);
May be you could introduce a 'uint64_t llp' variable instead and use it
directly in error_report and in the slb encoding test. This is minor.
Thanks,
C.
> int i;
>
> for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
> @@ -897,8 +926,8 @@ static int build_vrma_slbe(PowerPCCPU *cpu, ppc_slb_t *slb)
> }
> }
>
> - error_report("Bad page size encoding in LPCR[VRMASD]; LPCR=0x"
> - TARGET_FMT_lx, lpcr);
> + error_report("Bad VRMA page size encoding 0x" TARGET_FMT_lx,
> + get_vrma_llp(cpu));
>
> return -1;
> }
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 3/6] target/ppc: Fix pending HDEC when entering PM state
2023-07-26 18:22 [PATCH 0/6] ppc fixes possibly for 8.1 Nicholas Piggin
2023-07-26 18:22 ` [PATCH 1/6] target/ppc: Implement ASDR register for ISA v3.0 for HPT Nicholas Piggin
2023-07-26 18:22 ` [PATCH 2/6] target/ppc: Fix VRMA page size for ISA v3.0 Nicholas Piggin
@ 2023-07-26 18:22 ` Nicholas Piggin
2023-07-27 12:57 ` Cédric Le Goater
2023-07-26 18:22 ` [PATCH 4/6] hw/ppc: Avoid decrementer rounding errors Nicholas Piggin
` (3 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Nicholas Piggin @ 2023-07-26 18:22 UTC (permalink / raw)
To: Daniel Henrique Barboza
Cc: Nicholas Piggin, Cédric Le Goater, David Gibson, Greg Kurz,
Harsh Prateek Bora, qemu-ppc, qemu-devel
HDEC is defined to not wake from PM state. There is a check in the HDEC
timer to avoid setting the interrupt if we are in a PM state, but no
check on PM entry to lower HDEC if it already fired. This can cause a
HDECR wake up and QEMU abort with unsupported exception in Power Save
mode.
Fixes: 4b236b621bf ("ppc: Initial HDEC support")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
target/ppc/excp_helper.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 003805b202..9aa8e46566 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2685,6 +2685,12 @@ void helper_pminsn(CPUPPCState *env, uint32_t insn)
env->resume_as_sreset = (insn != PPC_PM_STOP) ||
(env->spr[SPR_PSSCR] & PSSCR_EC);
+ /* HDECR is not to wake from PM state, it may have already fired */
+ if (env->resume_as_sreset) {
+ PowerPCCPU *cpu = env_archcpu(env);
+ ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0);
+ }
+
ppc_maybe_interrupt(env);
}
#endif /* defined(TARGET_PPC64) */
--
2.40.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 3/6] target/ppc: Fix pending HDEC when entering PM state
2023-07-26 18:22 ` [PATCH 3/6] target/ppc: Fix pending HDEC when entering PM state Nicholas Piggin
@ 2023-07-27 12:57 ` Cédric Le Goater
0 siblings, 0 replies; 14+ messages in thread
From: Cédric Le Goater @ 2023-07-27 12:57 UTC (permalink / raw)
To: Nicholas Piggin, Daniel Henrique Barboza
Cc: David Gibson, Greg Kurz, Harsh Prateek Bora, qemu-ppc, qemu-devel
On 7/26/23 20:22, Nicholas Piggin wrote:
> HDEC is defined to not wake from PM state. There is a check in the HDEC
> timer to avoid setting the interrupt if we are in a PM state, but no
> check on PM entry to lower HDEC if it already fired. This can cause a
> HDECR wake up and QEMU abort with unsupported exception in Power Save
> mode.
>
> Fixes: 4b236b621bf ("ppc: Initial HDEC support")
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Thanks,
C.
> ---
> target/ppc/excp_helper.c | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index 003805b202..9aa8e46566 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -2685,6 +2685,12 @@ void helper_pminsn(CPUPPCState *env, uint32_t insn)
> env->resume_as_sreset = (insn != PPC_PM_STOP) ||
> (env->spr[SPR_PSSCR] & PSSCR_EC);
>
> + /* HDECR is not to wake from PM state, it may have already fired */
> + if (env->resume_as_sreset) {
> + PowerPCCPU *cpu = env_archcpu(env);
> + ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0);
> + }
> +
> ppc_maybe_interrupt(env);
> }
> #endif /* defined(TARGET_PPC64) */
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 4/6] hw/ppc: Avoid decrementer rounding errors
2023-07-26 18:22 [PATCH 0/6] ppc fixes possibly for 8.1 Nicholas Piggin
` (2 preceding siblings ...)
2023-07-26 18:22 ` [PATCH 3/6] target/ppc: Fix pending HDEC when entering PM state Nicholas Piggin
@ 2023-07-26 18:22 ` Nicholas Piggin
2023-07-26 18:22 ` [PATCH 5/6] hw/ppc: Always store the decrementer value Nicholas Piggin
` (2 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Nicholas Piggin @ 2023-07-26 18:22 UTC (permalink / raw)
To: Daniel Henrique Barboza
Cc: Nicholas Piggin, Cédric Le Goater, David Gibson, Greg Kurz,
Harsh Prateek Bora, qemu-ppc, qemu-devel
The decrementer register contains a relative time in timebase units.
When writing to DECR this is converted and stored as an absolute value
in nanosecond units, reading DECR converts back to relative timebase.
The tb<->ns conversion of the relative part can cause rounding such that
a value writen to the decrementer can read back a different, with time
held constant. This is a particular problem for a deterministic icount
and record-replay trace.
Fix this by storing the absolute value in timebase units rather than
nanoseconds. The math before:
store: decr_next = now_ns + decr * ns_per_sec / tb_per_sec
load: decr = (decr_next - now_ns) * tb_per_sec / ns_per_sec
load(store): decr = decr * ns_per_sec / tb_per_sec * tb_per_sec /
ns_per_sec
After:
store: decr_next = now_ns * tb_per_sec / ns_per_sec + decr
load: decr = decr_next - now_ns * tb_per_sec / ns_per_sec
load(store): decr = decr
Fixes: 9fddaa0c0cab ("PowerPC merge: real time TB and decrementer - faster and simpler exception handling (Jocelyn Mayer)")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
hw/ppc/ppc.c | 41 ++++++++++++++++++++++++-----------------
1 file changed, 24 insertions(+), 17 deletions(-)
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 0e0a3d93c3..fa60f76dd4 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -686,16 +686,17 @@ bool ppc_decr_clear_on_delivery(CPUPPCState *env)
static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
{
ppc_tb_t *tb_env = env->tb_env;
- int64_t decr, diff;
+ uint64_t now, n;
+ int64_t decr;
- diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- if (diff >= 0) {
- decr = muldiv64(diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
- } else if (tb_env->flags & PPC_TIMER_BOOKE) {
+ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ n = muldiv64(now, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
+ if (next > n && tb_env->flags & PPC_TIMER_BOOKE) {
decr = 0;
- } else {
- decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
+ } else {
+ decr = next - n;
}
+
trace_ppc_decr_load(decr);
return decr;
@@ -834,11 +835,11 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
/* Calculate the next timer event */
now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- next = now + muldiv64(value, NANOSECONDS_PER_SECOND, tb_env->decr_freq);
- *nextp = next;
+ next = muldiv64(now, tb_env->decr_freq, NANOSECONDS_PER_SECOND) + value;
+ *nextp = next; /* nextp is in timebase units */
/* Adjust timer */
- timer_mod(timer, next);
+ timer_mod(timer, muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq));
}
static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, target_ulong decr,
@@ -1153,14 +1154,20 @@ static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp)
} else {
trace_ppc4xx_pit_start(ppc40x_timer->pit_reload);
now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- next = now + muldiv64(ppc40x_timer->pit_reload,
- NANOSECONDS_PER_SECOND, tb_env->decr_freq);
- if (is_excp)
- next += tb_env->decr_next - now;
- if (next == now)
- next++;
+
+ if (is_excp) {
+ tb_env->decr_next += ppc40x_timer->pit_reload;
+ } else {
+ tb_env->decr_next = muldiv64(now, tb_env->decr_freq,
+ NANOSECONDS_PER_SECOND)
+ + ppc40x_timer->pit_reload;
+ }
+ next = muldiv64(tb_env->decr_next, NANOSECONDS_PER_SECOND,
+ tb_env->decr_freq);
+ if (next <= now) {
+ next = now + 1;
+ }
timer_mod(tb_env->decr_timer, next);
- tb_env->decr_next = next;
}
}
--
2.40.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 5/6] hw/ppc: Always store the decrementer value
2023-07-26 18:22 [PATCH 0/6] ppc fixes possibly for 8.1 Nicholas Piggin
` (3 preceding siblings ...)
2023-07-26 18:22 ` [PATCH 4/6] hw/ppc: Avoid decrementer rounding errors Nicholas Piggin
@ 2023-07-26 18:22 ` Nicholas Piggin
2023-07-27 12:26 ` Cédric Le Goater
2023-07-26 18:22 ` [PATCH 6/6] target/ppc: Migrate DECR SPR Nicholas Piggin
2023-07-28 20:05 ` [PATCH 0/6] ppc fixes possibly for 8.1 Daniel Henrique Barboza
6 siblings, 1 reply; 14+ messages in thread
From: Nicholas Piggin @ 2023-07-26 18:22 UTC (permalink / raw)
To: Daniel Henrique Barboza
Cc: Nicholas Piggin, Cédric Le Goater, David Gibson, Greg Kurz,
Harsh Prateek Bora, qemu-ppc, qemu-devel
When writing a value to the decrementer that raises an exception, the
irq is raised, but the value is not stored so the store doesn't appear
to have changed the register when it is read again.
Always store the write value to the register.
Fixes: e81a982aa53 ("PPC: Clean up DECR implementation")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
hw/ppc/ppc.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index fa60f76dd4..cd1993e9c1 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -812,6 +812,11 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
return;
}
+ /* Calculate the next timer event */
+ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ next = muldiv64(now, tb_env->decr_freq, NANOSECONDS_PER_SECOND) + value;
+ *nextp = next; /* nextp is in timebase units */
+
/*
* Going from 1 -> 0 or 0 -> -1 is the event to generate a DEC interrupt.
*
@@ -833,11 +838,6 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
(*lower_excp)(cpu);
}
- /* Calculate the next timer event */
- now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- next = muldiv64(now, tb_env->decr_freq, NANOSECONDS_PER_SECOND) + value;
- *nextp = next; /* nextp is in timebase units */
-
/* Adjust timer */
timer_mod(timer, muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq));
}
--
2.40.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 5/6] hw/ppc: Always store the decrementer value
2023-07-26 18:22 ` [PATCH 5/6] hw/ppc: Always store the decrementer value Nicholas Piggin
@ 2023-07-27 12:26 ` Cédric Le Goater
2023-07-30 9:40 ` Nicholas Piggin
0 siblings, 1 reply; 14+ messages in thread
From: Cédric Le Goater @ 2023-07-27 12:26 UTC (permalink / raw)
To: Nicholas Piggin, Daniel Henrique Barboza
Cc: David Gibson, Greg Kurz, Harsh Prateek Bora, qemu-ppc, qemu-devel
Hello Nick,
On 7/26/23 20:22, Nicholas Piggin wrote:
> When writing a value to the decrementer that raises an exception, the
> irq is raised, but the value is not stored so the store doesn't appear
> to have changed the register when it is read again.
>
> Always store the write value to the register.
This change has a serious performance impact when a guest is run under
PowerNV. Could you please take a look ?
Thanks,
C.
PS: We should really introduce avocado tests for nested.
> Fixes: e81a982aa53 ("PPC: Clean up DECR implementation")
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> hw/ppc/ppc.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
> index fa60f76dd4..cd1993e9c1 100644
> --- a/hw/ppc/ppc.c
> +++ b/hw/ppc/ppc.c
> @@ -812,6 +812,11 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
> return;
> }
>
> + /* Calculate the next timer event */
> + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> + next = muldiv64(now, tb_env->decr_freq, NANOSECONDS_PER_SECOND) + value;
> + *nextp = next; /* nextp is in timebase units */
> +
> /*
> * Going from 1 -> 0 or 0 -> -1 is the event to generate a DEC interrupt.
> *
> @@ -833,11 +838,6 @@ static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
> (*lower_excp)(cpu);
> }
>
> - /* Calculate the next timer event */
> - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> - next = muldiv64(now, tb_env->decr_freq, NANOSECONDS_PER_SECOND) + value;
> - *nextp = next; /* nextp is in timebase units */
> -
> /* Adjust timer */
> timer_mod(timer, muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq));
> }
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 5/6] hw/ppc: Always store the decrementer value
2023-07-27 12:26 ` Cédric Le Goater
@ 2023-07-30 9:40 ` Nicholas Piggin
2023-07-30 16:18 ` Cédric Le Goater
0 siblings, 1 reply; 14+ messages in thread
From: Nicholas Piggin @ 2023-07-30 9:40 UTC (permalink / raw)
To: Cédric Le Goater, Daniel Henrique Barboza
Cc: David Gibson, Greg Kurz, Harsh Prateek Bora, qemu-ppc, qemu-devel
On Thu Jul 27, 2023 at 10:26 PM AEST, Cédric Le Goater wrote:
> Hello Nick,
>
> On 7/26/23 20:22, Nicholas Piggin wrote:
> > When writing a value to the decrementer that raises an exception, the
> > irq is raised, but the value is not stored so the store doesn't appear
> > to have changed the register when it is read again.
> >
> > Always store the write value to the register.
>
> This change has a serious performance impact when a guest is run under
> PowerNV. Could you please take a look ?
Yeah, the decrementer load doesn't sign-extend the value correctly as
it should for the large-decrementer option. It makes skiboot detect
the decrementer size as 64 bits instead of 56, and things go bad from
there. KVM seems more affected because it's saving and restoring DEC
frequently.
The fix seems simple but considering the compounding series of bugs
and issues coming up with this, I think it will be better to defer
the decrementer work until 8.2 unfortunately.
Thanks,
Nick
> Thanks,
>
> C.
>
> PS: We should really introduce avocado tests for nested.
Yeah agreed. Both for pseries and powernv, ideally.
Thanks,
Nick
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 5/6] hw/ppc: Always store the decrementer value
2023-07-30 9:40 ` Nicholas Piggin
@ 2023-07-30 16:18 ` Cédric Le Goater
0 siblings, 0 replies; 14+ messages in thread
From: Cédric Le Goater @ 2023-07-30 16:18 UTC (permalink / raw)
To: Nicholas Piggin, Daniel Henrique Barboza
Cc: David Gibson, Greg Kurz, Harsh Prateek Bora, qemu-ppc, qemu-devel
On 7/30/23 11:40, Nicholas Piggin wrote:
> On Thu Jul 27, 2023 at 10:26 PM AEST, Cédric Le Goater wrote:
>> Hello Nick,
>>
>> On 7/26/23 20:22, Nicholas Piggin wrote:
>>> When writing a value to the decrementer that raises an exception, the
>>> irq is raised, but the value is not stored so the store doesn't appear
>>> to have changed the register when it is read again.
>>>
>>> Always store the write value to the register.
>>
>> This change has a serious performance impact when a guest is run under
>> PowerNV. Could you please take a look ?
>
> Yeah, the decrementer load doesn't sign-extend the value correctly as
> it should for the large-decrementer option. It makes skiboot detect
> the decrementer size as 64 bits instead of 56, and things go bad from
> there. KVM seems more affected because it's saving and restoring DEC
> frequently.
>
> The fix seems simple but considering the compounding series of bugs
> and issues coming up with this, I think it will be better to defer
> the decrementer work until 8.2 unfortunately.
Yes. QEMU 8.1 has already a lot, fixes, tests and models [1].
>> PS: We should really introduce avocado tests for nested.
>
> Yeah agreed. Both for pseries and powernv, ideally.
The same disk image could be used for the 3 HV implementations. This would
be a nice addition to Harsh's series [2]
Thanks,
C.
[1] https://wiki.qemu.org/ChangeLog/8.1#PowerPC
[2] https://patchwork.ozlabs.org/project/qemu-ppc/list/?series=364386
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 6/6] target/ppc: Migrate DECR SPR
2023-07-26 18:22 [PATCH 0/6] ppc fixes possibly for 8.1 Nicholas Piggin
` (4 preceding siblings ...)
2023-07-26 18:22 ` [PATCH 5/6] hw/ppc: Always store the decrementer value Nicholas Piggin
@ 2023-07-26 18:22 ` Nicholas Piggin
2023-07-28 20:05 ` [PATCH 0/6] ppc fixes possibly for 8.1 Daniel Henrique Barboza
6 siblings, 0 replies; 14+ messages in thread
From: Nicholas Piggin @ 2023-07-26 18:22 UTC (permalink / raw)
To: Daniel Henrique Barboza
Cc: Nicholas Piggin, Cédric Le Goater, David Gibson, Greg Kurz,
Harsh Prateek Bora, qemu-ppc, qemu-devel
TCG does not maintain the DEC reigster in the SPR array, so it does get
migrated. TCG also needs to re-start the decrementer timer on the
destination machine.
Load and store the decrementer into the SPR when migrating. This works
for the level-triggered (book3s) decrementer, and should be compatible
with existing KVM machines that do keep the DEC value there.
This fixes lost decrementer interrupt on migration that can cause
hangs, as well as other problems including record-replay bugs.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
target/ppc/machine.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index 134b16c625..ebb37e07d0 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -209,6 +209,14 @@ static int cpu_pre_save(void *opaque)
/* Used to retain migration compatibility for pre 6.0 for 601 machines. */
env->hflags_compat_nmsr = 0;
+ if (tcg_enabled()) {
+ /*
+ * TCG does not maintain the DECR spr (unlike KVM) so have to save
+ * it here.
+ */
+ env->spr[SPR_DECR] = cpu_ppc_load_decr(env);
+ }
+
return 0;
}
@@ -314,6 +322,12 @@ static int cpu_post_load(void *opaque, int version_id)
post_load_update_msr(env);
if (tcg_enabled()) {
+ /*
+ * TCG needs to re-start the decrementer timer and/or raise the
+ * interrupt. This works for level-triggered decrementer. Edge
+ * triggered types (including HDEC) would need to carry more state.
+ */
+ cpu_ppc_store_decr(env, env->spr[SPR_DECR]);
pmu_mmcr01_updated(env);
}
--
2.40.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 0/6] ppc fixes possibly for 8.1
2023-07-26 18:22 [PATCH 0/6] ppc fixes possibly for 8.1 Nicholas Piggin
` (5 preceding siblings ...)
2023-07-26 18:22 ` [PATCH 6/6] target/ppc: Migrate DECR SPR Nicholas Piggin
@ 2023-07-28 20:05 ` Daniel Henrique Barboza
6 siblings, 0 replies; 14+ messages in thread
From: Daniel Henrique Barboza @ 2023-07-28 20:05 UTC (permalink / raw)
To: Nicholas Piggin
Cc: Cédric Le Goater, David Gibson, Greg Kurz,
Harsh Prateek Bora, qemu-ppc, qemu-devel
On 7/26/23 15:22, Nicholas Piggin wrote:
> Sorry for the delay following up on the fixes, I got sucked down
> the decrementer rabbit hole that took longer than expected.
>
> Question about what is suitable for merge at this time and what
> should be stable. The first 3 have caused crashes or hangs running
> Linux and other software. Second 3 fix some issues with dec that
> could cause problems, especially with migration. But they affect
> more machines in more complex ways than the first 3.
>
> No changes to the first 3 already posted except to add Fixes:
> tags.
Patches 1 and 3 queued in ppc-next. Thanks,
Daniel
>
> Thanks,
> Nick
>
> Nicholas Piggin (6):
> target/ppc: Implement ASDR register for ISA v3.0 for HPT
> target/ppc: Fix VRMA page size for ISA v3.0
> target/ppc: Fix pending HDEC when entering PM state
> hw/ppc: Avoid decrementer rounding errors
> hw/ppc: Always store the decrementer value
> target/ppc: Migrate DECR SPR
>
> hw/ppc/ppc.c | 47 +++++++++++++++------------
> target/ppc/excp_helper.c | 6 ++++
> target/ppc/machine.c | 14 +++++++++
> target/ppc/mmu-hash64.c | 68 ++++++++++++++++++++++++++++++----------
> 4 files changed, 98 insertions(+), 37 deletions(-)
>
^ permalink raw reply [flat|nested] 14+ messages in thread