* [PATCH] spapr: avoid overhead of finding vhyp class in critical operations
@ 2024-02-24 7:33 Nicholas Piggin
2024-02-26 3:01 ` David Gibson
2024-03-12 6:38 ` Harsh Prateek Bora
0 siblings, 2 replies; 6+ messages in thread
From: Nicholas Piggin @ 2024-02-24 7:33 UTC (permalink / raw)
To: qemu-ppc
Cc: Nicholas Piggin, qemu-devel, BALATON Zoltan, David Gibson,
Harsh Prateek Bora, Daniel Henrique Barboza
PPC_VIRTUAL_HYPERVISOR_GET_CLASS is used in critical operations like
interrupts and TLB misses and is quite costly. Running the
kvm-unit-tests sieve program with radix MMU enabled thrashes the TCG
TLB and spends a lot of time in TLB and page table walking code. The
test takes 67 seconds to complete with a lot of time being spent in
code related to finding the vhyp class:
12.01% [.] g_str_hash
8.94% [.] g_hash_table_lookup
8.06% [.] object_class_dynamic_cast
6.21% [.] address_space_ldq
4.94% [.] __strcmp_avx2
4.28% [.] tlb_set_page_full
4.08% [.] address_space_translate_internal
3.17% [.] object_class_dynamic_cast_assert
2.84% [.] ppc_radix64_xlate
Keep a pointer to the class and avoid this lookup. This reduces the
execution time to 40 seconds.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
This feels a bit ugly, but the performance problem of looking up the
class in fast paths can't be ignored. Is there a "nicer" way to get the
same result?
Thanks,
Nick
target/ppc/cpu.h | 3 ++-
target/ppc/mmu-book3s-v3.h | 4 +---
hw/ppc/pegasos2.c | 1 +
target/ppc/cpu_init.c | 9 +++------
target/ppc/excp_helper.c | 16 ++++------------
target/ppc/kvm.c | 4 +---
target/ppc/mmu-hash64.c | 16 ++++------------
target/ppc/mmu-radix64.c | 4 +---
8 files changed, 17 insertions(+), 40 deletions(-)
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index ec14574d14..eb85d9aa71 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1437,6 +1437,7 @@ struct ArchCPU {
int vcpu_id;
uint32_t compat_pvr;
PPCVirtualHypervisor *vhyp;
+ PPCVirtualHypervisorClass *vhyp_class;
void *machine_data;
int32_t node_id; /* NUMA node this CPU belongs to */
PPCHash64Options *hash64_opts;
@@ -1535,7 +1536,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass,
static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu)
{
- return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu);
+ return cpu->vhyp_class->cpu_in_nested(cpu);
}
#endif /* CONFIG_USER_ONLY */
diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
index 674377a19e..f3f7993958 100644
--- a/target/ppc/mmu-book3s-v3.h
+++ b/target/ppc/mmu-book3s-v3.h
@@ -108,9 +108,7 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
uint64_t base;
if (cpu->vhyp) {
- PPCVirtualHypervisorClass *vhc =
- PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
- return vhc->hpt_mask(cpu->vhyp);
+ return cpu->vhyp_class->hpt_mask(cpu->vhyp);
}
if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
ppc_v3_pate_t pate;
diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
index 04d6decb2b..c22e8b336d 100644
--- a/hw/ppc/pegasos2.c
+++ b/hw/ppc/pegasos2.c
@@ -400,6 +400,7 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason)
machine->fdt = fdt;
pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine);
+ pm->cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(pm->cpu->vhyp);
}
enum pegasos2_rtas_tokens {
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 9bccddb350..63d0094024 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6631,6 +6631,7 @@ void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
CPUPPCState *env = &cpu->env;
cpu->vhyp = vhyp;
+ cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(vhyp);
/*
* With a virtual hypervisor mode we never allow the CPU to go
@@ -7224,9 +7225,7 @@ static void ppc_cpu_exec_enter(CPUState *cs)
PowerPCCPU *cpu = POWERPC_CPU(cs);
if (cpu->vhyp) {
- PPCVirtualHypervisorClass *vhc =
- PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
- vhc->cpu_exec_enter(cpu->vhyp, cpu);
+ cpu->vhyp_class->cpu_exec_enter(cpu->vhyp, cpu);
}
}
@@ -7235,9 +7234,7 @@ static void ppc_cpu_exec_exit(CPUState *cs)
PowerPCCPU *cpu = POWERPC_CPU(cs);
if (cpu->vhyp) {
- PPCVirtualHypervisorClass *vhc =
- PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
- vhc->cpu_exec_exit(cpu->vhyp, cpu);
+ cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu);
}
}
#endif /* CONFIG_TCG */
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 98952de267..445350488c 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -840,9 +840,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp)
* HV mode, we need to keep hypercall support.
*/
if (lev == 1 && cpu->vhyp) {
- PPCVirtualHypervisorClass *vhc =
- PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
- vhc->hypercall(cpu->vhyp, cpu);
+ cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
powerpc_reset_excp_state(cpu);
return;
}
@@ -1012,9 +1010,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp)
* HV mode, we need to keep hypercall support.
*/
if (lev == 1 && cpu->vhyp) {
- PPCVirtualHypervisorClass *vhc =
- PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
- vhc->hypercall(cpu->vhyp, cpu);
+ cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
powerpc_reset_excp_state(cpu);
return;
}
@@ -1534,9 +1530,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
/* "PAPR mode" built-in hypercall emulation */
if (lev == 1 && books_vhyp_handles_hcall(cpu)) {
- PPCVirtualHypervisorClass *vhc =
- PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
- vhc->hypercall(cpu->vhyp, cpu);
+ cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
powerpc_reset_excp_state(cpu);
return;
}
@@ -1677,10 +1671,8 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
}
if ((new_msr & MSR_HVB) && books_vhyp_handles_hv_excp(cpu)) {
- PPCVirtualHypervisorClass *vhc =
- PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
/* Deliver interrupt to L1 by returning from the H_ENTER_NESTED call */
- vhc->deliver_hv_excp(cpu, excp);
+ cpu->vhyp_class->deliver_hv_excp(cpu, excp);
powerpc_reset_excp_state(cpu);
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 26fa9d0575..5b5b96ab6b 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -862,9 +862,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu)
sregs.pvr = env->spr[SPR_PVR];
if (cpu->vhyp) {
- PPCVirtualHypervisorClass *vhc =
- PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
- sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
+ sregs.u.s.sdr1 = cpu->vhyp_class->encode_hpt_for_kvm_pr(cpu->vhyp);
} else {
sregs.u.s.sdr1 = env->spr[SPR_SDR1];
}
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index d645c0bb94..196b4b2a48 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -516,9 +516,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
const ppc_hash_pte64_t *hptes;
if (cpu->vhyp) {
- PPCVirtualHypervisorClass *vhc =
- PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
- return vhc->map_hptes(cpu->vhyp, ptex, n);
+ return cpu->vhyp_class->map_hptes(cpu->vhyp, ptex, n);
}
base = ppc_hash64_hpt_base(cpu);
@@ -538,9 +536,7 @@ void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
hwaddr ptex, int n)
{
if (cpu->vhyp) {
- PPCVirtualHypervisorClass *vhc =
- PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
- vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n);
+ cpu->vhyp_class->unmap_hptes(cpu->vhyp, hptes, ptex, n);
return;
}
@@ -820,9 +816,7 @@ static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R;
if (cpu->vhyp) {
- PPCVirtualHypervisorClass *vhc =
- PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
- vhc->hpte_set_r(cpu->vhyp, ptex, pte1);
+ cpu->vhyp_class->hpte_set_r(cpu->vhyp, ptex, pte1);
return;
}
base = ppc_hash64_hpt_base(cpu);
@@ -837,9 +831,7 @@ static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C;
if (cpu->vhyp) {
- PPCVirtualHypervisorClass *vhc =
- PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
- vhc->hpte_set_c(cpu->vhyp, ptex, pte1);
+ cpu->vhyp_class->hpte_set_c(cpu->vhyp, ptex, pte1);
return;
}
base = ppc_hash64_hpt_base(cpu);
diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 5823e039e6..496ba87a95 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -677,9 +677,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr,
/* Get Partition Table */
if (cpu->vhyp) {
- PPCVirtualHypervisorClass *vhc;
- vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
- if (!vhc->get_pate(cpu->vhyp, cpu, lpid, &pate)) {
+ if (!cpu->vhyp_class->get_pate(cpu->vhyp, cpu, lpid, &pate)) {
if (guest_visible) {
ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr,
DSISR_R_BADCONFIG);
--
2.42.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] spapr: avoid overhead of finding vhyp class in critical operations
2024-02-24 7:33 [PATCH] spapr: avoid overhead of finding vhyp class in critical operations Nicholas Piggin
@ 2024-02-26 3:01 ` David Gibson
2024-03-12 6:38 ` Harsh Prateek Bora
1 sibling, 0 replies; 6+ messages in thread
From: David Gibson @ 2024-02-26 3:01 UTC (permalink / raw)
To: Nicholas Piggin
Cc: qemu-ppc, qemu-devel, BALATON Zoltan, Harsh Prateek Bora,
Daniel Henrique Barboza
[-- Attachment #1: Type: text/plain, Size: 10559 bytes --]
On Sat, Feb 24, 2024 at 05:33:59PM +1000, Nicholas Piggin wrote:
> PPC_VIRTUAL_HYPERVISOR_GET_CLASS is used in critical operations like
> interrupts and TLB misses and is quite costly. Running the
> kvm-unit-tests sieve program with radix MMU enabled thrashes the TCG
> TLB and spends a lot of time in TLB and page table walking code. The
> test takes 67 seconds to complete with a lot of time being spent in
> code related to finding the vhyp class:
>
> 12.01% [.] g_str_hash
> 8.94% [.] g_hash_table_lookup
> 8.06% [.] object_class_dynamic_cast
> 6.21% [.] address_space_ldq
> 4.94% [.] __strcmp_avx2
> 4.28% [.] tlb_set_page_full
> 4.08% [.] address_space_translate_internal
> 3.17% [.] object_class_dynamic_cast_assert
> 2.84% [.] ppc_radix64_xlate
>
> Keep a pointer to the class and avoid this lookup. This reduces the
> execution time to 40 seconds.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> This feels a bit ugly, but the performance problem of looking up the
> class in fast paths can't be ignored. Is there a "nicer" way to get the
> same result?
Not one I'm aware of, unfortunately.
>
> Thanks,
> Nick
>
> target/ppc/cpu.h | 3 ++-
> target/ppc/mmu-book3s-v3.h | 4 +---
> hw/ppc/pegasos2.c | 1 +
> target/ppc/cpu_init.c | 9 +++------
> target/ppc/excp_helper.c | 16 ++++------------
> target/ppc/kvm.c | 4 +---
> target/ppc/mmu-hash64.c | 16 ++++------------
> target/ppc/mmu-radix64.c | 4 +---
> 8 files changed, 17 insertions(+), 40 deletions(-)
>
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index ec14574d14..eb85d9aa71 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -1437,6 +1437,7 @@ struct ArchCPU {
> int vcpu_id;
> uint32_t compat_pvr;
> PPCVirtualHypervisor *vhyp;
> + PPCVirtualHypervisorClass *vhyp_class;
> void *machine_data;
> int32_t node_id; /* NUMA node this CPU belongs to */
> PPCHash64Options *hash64_opts;
> @@ -1535,7 +1536,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass,
>
> static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu)
> {
> - return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu);
> + return cpu->vhyp_class->cpu_in_nested(cpu);
> }
> #endif /* CONFIG_USER_ONLY */
>
> diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
> index 674377a19e..f3f7993958 100644
> --- a/target/ppc/mmu-book3s-v3.h
> +++ b/target/ppc/mmu-book3s-v3.h
> @@ -108,9 +108,7 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
> uint64_t base;
>
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - return vhc->hpt_mask(cpu->vhyp);
> + return cpu->vhyp_class->hpt_mask(cpu->vhyp);
> }
> if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
> ppc_v3_pate_t pate;
> diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
> index 04d6decb2b..c22e8b336d 100644
> --- a/hw/ppc/pegasos2.c
> +++ b/hw/ppc/pegasos2.c
> @@ -400,6 +400,7 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason)
> machine->fdt = fdt;
>
> pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine);
> + pm->cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(pm->cpu->vhyp);
> }
>
> enum pegasos2_rtas_tokens {
> diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
> index 9bccddb350..63d0094024 100644
> --- a/target/ppc/cpu_init.c
> +++ b/target/ppc/cpu_init.c
> @@ -6631,6 +6631,7 @@ void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
> CPUPPCState *env = &cpu->env;
>
> cpu->vhyp = vhyp;
> + cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(vhyp);
>
> /*
> * With a virtual hypervisor mode we never allow the CPU to go
> @@ -7224,9 +7225,7 @@ static void ppc_cpu_exec_enter(CPUState *cs)
> PowerPCCPU *cpu = POWERPC_CPU(cs);
>
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->cpu_exec_enter(cpu->vhyp, cpu);
> + cpu->vhyp_class->cpu_exec_enter(cpu->vhyp, cpu);
> }
> }
>
> @@ -7235,9 +7234,7 @@ static void ppc_cpu_exec_exit(CPUState *cs)
> PowerPCCPU *cpu = POWERPC_CPU(cs);
>
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->cpu_exec_exit(cpu->vhyp, cpu);
> + cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu);
> }
> }
> #endif /* CONFIG_TCG */
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index 98952de267..445350488c 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -840,9 +840,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp)
> * HV mode, we need to keep hypercall support.
> */
> if (lev == 1 && cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->hypercall(cpu->vhyp, cpu);
> + cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
> powerpc_reset_excp_state(cpu);
> return;
> }
> @@ -1012,9 +1010,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp)
> * HV mode, we need to keep hypercall support.
> */
> if (lev == 1 && cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->hypercall(cpu->vhyp, cpu);
> + cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
> powerpc_reset_excp_state(cpu);
> return;
> }
> @@ -1534,9 +1530,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
>
> /* "PAPR mode" built-in hypercall emulation */
> if (lev == 1 && books_vhyp_handles_hcall(cpu)) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->hypercall(cpu->vhyp, cpu);
> + cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
> powerpc_reset_excp_state(cpu);
> return;
> }
> @@ -1677,10 +1671,8 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
> }
>
> if ((new_msr & MSR_HVB) && books_vhyp_handles_hv_excp(cpu)) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> /* Deliver interrupt to L1 by returning from the H_ENTER_NESTED call */
> - vhc->deliver_hv_excp(cpu, excp);
> + cpu->vhyp_class->deliver_hv_excp(cpu, excp);
>
> powerpc_reset_excp_state(cpu);
>
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index 26fa9d0575..5b5b96ab6b 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -862,9 +862,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu)
> sregs.pvr = env->spr[SPR_PVR];
>
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
> + sregs.u.s.sdr1 = cpu->vhyp_class->encode_hpt_for_kvm_pr(cpu->vhyp);
> } else {
> sregs.u.s.sdr1 = env->spr[SPR_SDR1];
> }
> diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
> index d645c0bb94..196b4b2a48 100644
> --- a/target/ppc/mmu-hash64.c
> +++ b/target/ppc/mmu-hash64.c
> @@ -516,9 +516,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
> const ppc_hash_pte64_t *hptes;
>
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - return vhc->map_hptes(cpu->vhyp, ptex, n);
> + return cpu->vhyp_class->map_hptes(cpu->vhyp, ptex, n);
> }
> base = ppc_hash64_hpt_base(cpu);
>
> @@ -538,9 +536,7 @@ void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
> hwaddr ptex, int n)
> {
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n);
> + cpu->vhyp_class->unmap_hptes(cpu->vhyp, hptes, ptex, n);
> return;
> }
>
> @@ -820,9 +816,7 @@ static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
> hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R;
>
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->hpte_set_r(cpu->vhyp, ptex, pte1);
> + cpu->vhyp_class->hpte_set_r(cpu->vhyp, ptex, pte1);
> return;
> }
> base = ppc_hash64_hpt_base(cpu);
> @@ -837,9 +831,7 @@ static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
> hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C;
>
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->hpte_set_c(cpu->vhyp, ptex, pte1);
> + cpu->vhyp_class->hpte_set_c(cpu->vhyp, ptex, pte1);
> return;
> }
> base = ppc_hash64_hpt_base(cpu);
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index 5823e039e6..496ba87a95 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -677,9 +677,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr,
>
> /* Get Partition Table */
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc;
> - vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - if (!vhc->get_pate(cpu->vhyp, cpu, lpid, &pate)) {
> + if (!cpu->vhyp_class->get_pate(cpu->vhyp, cpu, lpid, &pate)) {
> if (guest_visible) {
> ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr,
> DSISR_R_BADCONFIG);
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] spapr: avoid overhead of finding vhyp class in critical operations
2024-02-24 7:33 [PATCH] spapr: avoid overhead of finding vhyp class in critical operations Nicholas Piggin
2024-02-26 3:01 ` David Gibson
@ 2024-03-12 6:38 ` Harsh Prateek Bora
2024-03-12 8:48 ` Nicholas Piggin
1 sibling, 1 reply; 6+ messages in thread
From: Harsh Prateek Bora @ 2024-03-12 6:38 UTC (permalink / raw)
To: Nicholas Piggin, qemu-ppc
Cc: qemu-devel, BALATON Zoltan, David Gibson, Daniel Henrique Barboza
Hi Nick,
One minor comment below:
On 2/24/24 13:03, Nicholas Piggin wrote:
> PPC_VIRTUAL_HYPERVISOR_GET_CLASS is used in critical operations like
> interrupts and TLB misses and is quite costly. Running the
> kvm-unit-tests sieve program with radix MMU enabled thrashes the TCG
> TLB and spends a lot of time in TLB and page table walking code. The
> test takes 67 seconds to complete with a lot of time being spent in
> code related to finding the vhyp class:
>
> 12.01% [.] g_str_hash
> 8.94% [.] g_hash_table_lookup
> 8.06% [.] object_class_dynamic_cast
> 6.21% [.] address_space_ldq
> 4.94% [.] __strcmp_avx2
> 4.28% [.] tlb_set_page_full
> 4.08% [.] address_space_translate_internal
> 3.17% [.] object_class_dynamic_cast_assert
> 2.84% [.] ppc_radix64_xlate
>
> Keep a pointer to the class and avoid this lookup. This reduces the
> execution time to 40 seconds.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> This feels a bit ugly, but the performance problem of looking up the
> class in fast paths can't be ignored. Is there a "nicer" way to get the
> same result?
>
> Thanks,
> Nick
>
> target/ppc/cpu.h | 3 ++-
> target/ppc/mmu-book3s-v3.h | 4 +---
> hw/ppc/pegasos2.c | 1 +
> target/ppc/cpu_init.c | 9 +++------
> target/ppc/excp_helper.c | 16 ++++------------
> target/ppc/kvm.c | 4 +---
> target/ppc/mmu-hash64.c | 16 ++++------------
> target/ppc/mmu-radix64.c | 4 +---
> 8 files changed, 17 insertions(+), 40 deletions(-)
>
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index ec14574d14..eb85d9aa71 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -1437,6 +1437,7 @@ struct ArchCPU {
> int vcpu_id;
> uint32_t compat_pvr;
> PPCVirtualHypervisor *vhyp;
> + PPCVirtualHypervisorClass *vhyp_class;
> void *machine_data;
> int32_t node_id; /* NUMA node this CPU belongs to */
> PPCHash64Options *hash64_opts;
> @@ -1535,7 +1536,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass,
>
> static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu)
> {
> - return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu);
> + return cpu->vhyp_class->cpu_in_nested(cpu);
> }
> #endif /* CONFIG_USER_ONLY */
>
> diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
> index 674377a19e..f3f7993958 100644
> --- a/target/ppc/mmu-book3s-v3.h
> +++ b/target/ppc/mmu-book3s-v3.h
> @@ -108,9 +108,7 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
> uint64_t base;
>
> if (cpu->vhyp) {
All the checks for cpu->vhyp needs to be changed to check for
cpu->vhyp_class now, for all such instances.
With that,
Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - return vhc->hpt_mask(cpu->vhyp);
> + return cpu->vhyp_class->hpt_mask(cpu->vhyp);
> }
> if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
> ppc_v3_pate_t pate;
> diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
> index 04d6decb2b..c22e8b336d 100644
> --- a/hw/ppc/pegasos2.c
> +++ b/hw/ppc/pegasos2.c
> @@ -400,6 +400,7 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason)
> machine->fdt = fdt;
>
> pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine);
> + pm->cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(pm->cpu->vhyp);
> }
>
> enum pegasos2_rtas_tokens {
> diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
> index 9bccddb350..63d0094024 100644
> --- a/target/ppc/cpu_init.c
> +++ b/target/ppc/cpu_init.c
> @@ -6631,6 +6631,7 @@ void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
> CPUPPCState *env = &cpu->env;
>
> cpu->vhyp = vhyp;
> + cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(vhyp);
>
> /*
> * With a virtual hypervisor mode we never allow the CPU to go
> @@ -7224,9 +7225,7 @@ static void ppc_cpu_exec_enter(CPUState *cs)
> PowerPCCPU *cpu = POWERPC_CPU(cs);
>
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->cpu_exec_enter(cpu->vhyp, cpu);
> + cpu->vhyp_class->cpu_exec_enter(cpu->vhyp, cpu);
> }
> }
>
> @@ -7235,9 +7234,7 @@ static void ppc_cpu_exec_exit(CPUState *cs)
> PowerPCCPU *cpu = POWERPC_CPU(cs);
>
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->cpu_exec_exit(cpu->vhyp, cpu);
> + cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu);
> }
> }
> #endif /* CONFIG_TCG */
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index 98952de267..445350488c 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -840,9 +840,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp)
> * HV mode, we need to keep hypercall support.
> */
> if (lev == 1 && cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->hypercall(cpu->vhyp, cpu);
> + cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
> powerpc_reset_excp_state(cpu);
> return;
> }
> @@ -1012,9 +1010,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp)
> * HV mode, we need to keep hypercall support.
> */
> if (lev == 1 && cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->hypercall(cpu->vhyp, cpu);
> + cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
> powerpc_reset_excp_state(cpu);
> return;
> }
> @@ -1534,9 +1530,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
>
> /* "PAPR mode" built-in hypercall emulation */
> if (lev == 1 && books_vhyp_handles_hcall(cpu)) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->hypercall(cpu->vhyp, cpu);
> + cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
> powerpc_reset_excp_state(cpu);
> return;
> }
> @@ -1677,10 +1671,8 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
> }
>
> if ((new_msr & MSR_HVB) && books_vhyp_handles_hv_excp(cpu)) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> /* Deliver interrupt to L1 by returning from the H_ENTER_NESTED call */
> - vhc->deliver_hv_excp(cpu, excp);
> + cpu->vhyp_class->deliver_hv_excp(cpu, excp);
>
> powerpc_reset_excp_state(cpu);
>
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index 26fa9d0575..5b5b96ab6b 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -862,9 +862,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu)
> sregs.pvr = env->spr[SPR_PVR];
>
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
> + sregs.u.s.sdr1 = cpu->vhyp_class->encode_hpt_for_kvm_pr(cpu->vhyp);
> } else {
> sregs.u.s.sdr1 = env->spr[SPR_SDR1];
> }
> diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
> index d645c0bb94..196b4b2a48 100644
> --- a/target/ppc/mmu-hash64.c
> +++ b/target/ppc/mmu-hash64.c
> @@ -516,9 +516,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
> const ppc_hash_pte64_t *hptes;
>
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - return vhc->map_hptes(cpu->vhyp, ptex, n);
> + return cpu->vhyp_class->map_hptes(cpu->vhyp, ptex, n);
> }
> base = ppc_hash64_hpt_base(cpu);
>
> @@ -538,9 +536,7 @@ void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
> hwaddr ptex, int n)
> {
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n);
> + cpu->vhyp_class->unmap_hptes(cpu->vhyp, hptes, ptex, n);
> return;
> }
>
> @@ -820,9 +816,7 @@ static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
> hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R;
>
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->hpte_set_r(cpu->vhyp, ptex, pte1);
> + cpu->vhyp_class->hpte_set_r(cpu->vhyp, ptex, pte1);
> return;
> }
> base = ppc_hash64_hpt_base(cpu);
> @@ -837,9 +831,7 @@ static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
> hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C;
>
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc =
> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - vhc->hpte_set_c(cpu->vhyp, ptex, pte1);
> + cpu->vhyp_class->hpte_set_c(cpu->vhyp, ptex, pte1);
> return;
> }
> base = ppc_hash64_hpt_base(cpu);
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index 5823e039e6..496ba87a95 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -677,9 +677,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr,
>
> /* Get Partition Table */
> if (cpu->vhyp) {
> - PPCVirtualHypervisorClass *vhc;
> - vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> - if (!vhc->get_pate(cpu->vhyp, cpu, lpid, &pate)) {
> + if (!cpu->vhyp_class->get_pate(cpu->vhyp, cpu, lpid, &pate)) {
> if (guest_visible) {
> ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr,
> DSISR_R_BADCONFIG);
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] spapr: avoid overhead of finding vhyp class in critical operations
2024-03-12 6:38 ` Harsh Prateek Bora
@ 2024-03-12 8:48 ` Nicholas Piggin
2024-03-12 8:56 ` Harsh Prateek Bora
0 siblings, 1 reply; 6+ messages in thread
From: Nicholas Piggin @ 2024-03-12 8:48 UTC (permalink / raw)
To: Harsh Prateek Bora, qemu-ppc
Cc: qemu-devel, BALATON Zoltan, David Gibson, Daniel Henrique Barboza
On Tue Mar 12, 2024 at 4:38 PM AEST, Harsh Prateek Bora wrote:
> Hi Nick,
>
> One minor comment below:
>
> On 2/24/24 13:03, Nicholas Piggin wrote:
> > PPC_VIRTUAL_HYPERVISOR_GET_CLASS is used in critical operations like
> > interrupts and TLB misses and is quite costly. Running the
> > kvm-unit-tests sieve program with radix MMU enabled thrashes the TCG
> > TLB and spends a lot of time in TLB and page table walking code. The
> > test takes 67 seconds to complete with a lot of time being spent in
> > code related to finding the vhyp class:
> >
> > 12.01% [.] g_str_hash
> > 8.94% [.] g_hash_table_lookup
> > 8.06% [.] object_class_dynamic_cast
> > 6.21% [.] address_space_ldq
> > 4.94% [.] __strcmp_avx2
> > 4.28% [.] tlb_set_page_full
> > 4.08% [.] address_space_translate_internal
> > 3.17% [.] object_class_dynamic_cast_assert
> > 2.84% [.] ppc_radix64_xlate
> >
> > Keep a pointer to the class and avoid this lookup. This reduces the
> > execution time to 40 seconds.
> >
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> > This feels a bit ugly, but the performance problem of looking up the
> > class in fast paths can't be ignored. Is there a "nicer" way to get the
> > same result?
> >
> > Thanks,
> > Nick
> >
> > target/ppc/cpu.h | 3 ++-
> > target/ppc/mmu-book3s-v3.h | 4 +---
> > hw/ppc/pegasos2.c | 1 +
> > target/ppc/cpu_init.c | 9 +++------
> > target/ppc/excp_helper.c | 16 ++++------------
> > target/ppc/kvm.c | 4 +---
> > target/ppc/mmu-hash64.c | 16 ++++------------
> > target/ppc/mmu-radix64.c | 4 +---
> > 8 files changed, 17 insertions(+), 40 deletions(-)
> >
> > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> > index ec14574d14..eb85d9aa71 100644
> > --- a/target/ppc/cpu.h
> > +++ b/target/ppc/cpu.h
> > @@ -1437,6 +1437,7 @@ struct ArchCPU {
> > int vcpu_id;
> > uint32_t compat_pvr;
> > PPCVirtualHypervisor *vhyp;
> > + PPCVirtualHypervisorClass *vhyp_class;
> > void *machine_data;
> > int32_t node_id; /* NUMA node this CPU belongs to */
> > PPCHash64Options *hash64_opts;
> > @@ -1535,7 +1536,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass,
> >
> > static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu)
> > {
> > - return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu);
> > + return cpu->vhyp_class->cpu_in_nested(cpu);
> > }
> > #endif /* CONFIG_USER_ONLY */
> >
> > diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
> > index 674377a19e..f3f7993958 100644
> > --- a/target/ppc/mmu-book3s-v3.h
> > +++ b/target/ppc/mmu-book3s-v3.h
> > @@ -108,9 +108,7 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
> > uint64_t base;
> >
> > if (cpu->vhyp) {
>
> All the checks for cpu->vhyp needs to be changed to check for
> cpu->vhyp_class now, for all such instances.
It wasn't supposed to, because vhyp != NULL implies vhyp_class != NULL.
It's supposed to be an equivalent transformation just changing the
lookup function.
Okay to leave it as is?
Thanks,
Nick
>
> With that,
>
> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
>
>
> > - PPCVirtualHypervisorClass *vhc =
> > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> > - return vhc->hpt_mask(cpu->vhyp);
> > + return cpu->vhyp_class->hpt_mask(cpu->vhyp);
> > }
> > if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
> > ppc_v3_pate_t pate;
> > diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
> > index 04d6decb2b..c22e8b336d 100644
> > --- a/hw/ppc/pegasos2.c
> > +++ b/hw/ppc/pegasos2.c
> > @@ -400,6 +400,7 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason)
> > machine->fdt = fdt;
> >
> > pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine);
> > + pm->cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(pm->cpu->vhyp);
> > }
> >
> > enum pegasos2_rtas_tokens {
> > diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
> > index 9bccddb350..63d0094024 100644
> > --- a/target/ppc/cpu_init.c
> > +++ b/target/ppc/cpu_init.c
> > @@ -6631,6 +6631,7 @@ void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
> > CPUPPCState *env = &cpu->env;
> >
> > cpu->vhyp = vhyp;
> > + cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(vhyp);
> >
> > /*
> > * With a virtual hypervisor mode we never allow the CPU to go
> > @@ -7224,9 +7225,7 @@ static void ppc_cpu_exec_enter(CPUState *cs)
> > PowerPCCPU *cpu = POWERPC_CPU(cs);
> >
> > if (cpu->vhyp) {
> > - PPCVirtualHypervisorClass *vhc =
> > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> > - vhc->cpu_exec_enter(cpu->vhyp, cpu);
> > + cpu->vhyp_class->cpu_exec_enter(cpu->vhyp, cpu);
> > }
> > }
> >
> > @@ -7235,9 +7234,7 @@ static void ppc_cpu_exec_exit(CPUState *cs)
> > PowerPCCPU *cpu = POWERPC_CPU(cs);
> >
> > if (cpu->vhyp) {
> > - PPCVirtualHypervisorClass *vhc =
> > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> > - vhc->cpu_exec_exit(cpu->vhyp, cpu);
> > + cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu);
> > }
> > }
> > #endif /* CONFIG_TCG */
> > diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> > index 98952de267..445350488c 100644
> > --- a/target/ppc/excp_helper.c
> > +++ b/target/ppc/excp_helper.c
> > @@ -840,9 +840,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp)
> > * HV mode, we need to keep hypercall support.
> > */
> > if (lev == 1 && cpu->vhyp) {
> > - PPCVirtualHypervisorClass *vhc =
> > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> > - vhc->hypercall(cpu->vhyp, cpu);
> > + cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
> > powerpc_reset_excp_state(cpu);
> > return;
> > }
> > @@ -1012,9 +1010,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp)
> > * HV mode, we need to keep hypercall support.
> > */
> > if (lev == 1 && cpu->vhyp) {
> > - PPCVirtualHypervisorClass *vhc =
> > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> > - vhc->hypercall(cpu->vhyp, cpu);
> > + cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
> > powerpc_reset_excp_state(cpu);
> > return;
> > }
> > @@ -1534,9 +1530,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
> >
> > /* "PAPR mode" built-in hypercall emulation */
> > if (lev == 1 && books_vhyp_handles_hcall(cpu)) {
> > - PPCVirtualHypervisorClass *vhc =
> > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> > - vhc->hypercall(cpu->vhyp, cpu);
> > + cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
> > powerpc_reset_excp_state(cpu);
> > return;
> > }
> > @@ -1677,10 +1671,8 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
> > }
> >
> > if ((new_msr & MSR_HVB) && books_vhyp_handles_hv_excp(cpu)) {
> > - PPCVirtualHypervisorClass *vhc =
> > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> > /* Deliver interrupt to L1 by returning from the H_ENTER_NESTED call */
> > - vhc->deliver_hv_excp(cpu, excp);
> > + cpu->vhyp_class->deliver_hv_excp(cpu, excp);
> >
> > powerpc_reset_excp_state(cpu);
> >
> > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> > index 26fa9d0575..5b5b96ab6b 100644
> > --- a/target/ppc/kvm.c
> > +++ b/target/ppc/kvm.c
> > @@ -862,9 +862,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu)
> > sregs.pvr = env->spr[SPR_PVR];
> >
> > if (cpu->vhyp) {
> > - PPCVirtualHypervisorClass *vhc =
> > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> > - sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
> > + sregs.u.s.sdr1 = cpu->vhyp_class->encode_hpt_for_kvm_pr(cpu->vhyp);
> > } else {
> > sregs.u.s.sdr1 = env->spr[SPR_SDR1];
> > }
> > diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
> > index d645c0bb94..196b4b2a48 100644
> > --- a/target/ppc/mmu-hash64.c
> > +++ b/target/ppc/mmu-hash64.c
> > @@ -516,9 +516,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
> > const ppc_hash_pte64_t *hptes;
> >
> > if (cpu->vhyp) {
> > - PPCVirtualHypervisorClass *vhc =
> > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> > - return vhc->map_hptes(cpu->vhyp, ptex, n);
> > + return cpu->vhyp_class->map_hptes(cpu->vhyp, ptex, n);
> > }
> > base = ppc_hash64_hpt_base(cpu);
> >
> > @@ -538,9 +536,7 @@ void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
> > hwaddr ptex, int n)
> > {
> > if (cpu->vhyp) {
> > - PPCVirtualHypervisorClass *vhc =
> > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> > - vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n);
> > + cpu->vhyp_class->unmap_hptes(cpu->vhyp, hptes, ptex, n);
> > return;
> > }
> >
> > @@ -820,9 +816,7 @@ static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
> > hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R;
> >
> > if (cpu->vhyp) {
> > - PPCVirtualHypervisorClass *vhc =
> > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> > - vhc->hpte_set_r(cpu->vhyp, ptex, pte1);
> > + cpu->vhyp_class->hpte_set_r(cpu->vhyp, ptex, pte1);
> > return;
> > }
> > base = ppc_hash64_hpt_base(cpu);
> > @@ -837,9 +831,7 @@ static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
> > hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C;
> >
> > if (cpu->vhyp) {
> > - PPCVirtualHypervisorClass *vhc =
> > - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> > - vhc->hpte_set_c(cpu->vhyp, ptex, pte1);
> > + cpu->vhyp_class->hpte_set_c(cpu->vhyp, ptex, pte1);
> > return;
> > }
> > base = ppc_hash64_hpt_base(cpu);
> > diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> > index 5823e039e6..496ba87a95 100644
> > --- a/target/ppc/mmu-radix64.c
> > +++ b/target/ppc/mmu-radix64.c
> > @@ -677,9 +677,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr,
> >
> > /* Get Partition Table */
> > if (cpu->vhyp) {
> > - PPCVirtualHypervisorClass *vhc;
> > - vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
> > - if (!vhc->get_pate(cpu->vhyp, cpu, lpid, &pate)) {
> > + if (!cpu->vhyp_class->get_pate(cpu->vhyp, cpu, lpid, &pate)) {
> > if (guest_visible) {
> > ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr,
> > DSISR_R_BADCONFIG);
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] spapr: avoid overhead of finding vhyp class in critical operations
2024-03-12 8:48 ` Nicholas Piggin
@ 2024-03-12 8:56 ` Harsh Prateek Bora
2024-03-12 10:40 ` Nicholas Piggin
0 siblings, 1 reply; 6+ messages in thread
From: Harsh Prateek Bora @ 2024-03-12 8:56 UTC (permalink / raw)
To: Nicholas Piggin, qemu-ppc
Cc: qemu-devel, BALATON Zoltan, David Gibson, Daniel Henrique Barboza
On 3/12/24 14:18, Nicholas Piggin wrote:
> On Tue Mar 12, 2024 at 4:38 PM AEST, Harsh Prateek Bora wrote:
>> Hi Nick,
>>
>> One minor comment below:
>>
>> On 2/24/24 13:03, Nicholas Piggin wrote:
>>> PPC_VIRTUAL_HYPERVISOR_GET_CLASS is used in critical operations like
>>> interrupts and TLB misses and is quite costly. Running the
>>> kvm-unit-tests sieve program with radix MMU enabled thrashes the TCG
>>> TLB and spends a lot of time in TLB and page table walking code. The
>>> test takes 67 seconds to complete with a lot of time being spent in
>>> code related to finding the vhyp class:
>>>
>>> 12.01% [.] g_str_hash
>>> 8.94% [.] g_hash_table_lookup
>>> 8.06% [.] object_class_dynamic_cast
>>> 6.21% [.] address_space_ldq
>>> 4.94% [.] __strcmp_avx2
>>> 4.28% [.] tlb_set_page_full
>>> 4.08% [.] address_space_translate_internal
>>> 3.17% [.] object_class_dynamic_cast_assert
>>> 2.84% [.] ppc_radix64_xlate
>>>
>>> Keep a pointer to the class and avoid this lookup. This reduces the
>>> execution time to 40 seconds.
>>>
>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>> This feels a bit ugly, but the performance problem of looking up the
>>> class in fast paths can't be ignored. Is there a "nicer" way to get the
>>> same result?
>>>
>>> Thanks,
>>> Nick
>>>
>>> target/ppc/cpu.h | 3 ++-
>>> target/ppc/mmu-book3s-v3.h | 4 +---
>>> hw/ppc/pegasos2.c | 1 +
>>> target/ppc/cpu_init.c | 9 +++------
>>> target/ppc/excp_helper.c | 16 ++++------------
>>> target/ppc/kvm.c | 4 +---
>>> target/ppc/mmu-hash64.c | 16 ++++------------
>>> target/ppc/mmu-radix64.c | 4 +---
>>> 8 files changed, 17 insertions(+), 40 deletions(-)
>>>
>>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
>>> index ec14574d14..eb85d9aa71 100644
>>> --- a/target/ppc/cpu.h
>>> +++ b/target/ppc/cpu.h
>>> @@ -1437,6 +1437,7 @@ struct ArchCPU {
>>> int vcpu_id;
>>> uint32_t compat_pvr;
>>> PPCVirtualHypervisor *vhyp;
>>> + PPCVirtualHypervisorClass *vhyp_class;
>>> void *machine_data;
>>> int32_t node_id; /* NUMA node this CPU belongs to */
>>> PPCHash64Options *hash64_opts;
>>> @@ -1535,7 +1536,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass,
>>>
>>> static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu)
>>> {
>>> - return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu);
>>> + return cpu->vhyp_class->cpu_in_nested(cpu);
>>> }
>>> #endif /* CONFIG_USER_ONLY */
>>>
>>> diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
>>> index 674377a19e..f3f7993958 100644
>>> --- a/target/ppc/mmu-book3s-v3.h
>>> +++ b/target/ppc/mmu-book3s-v3.h
>>> @@ -108,9 +108,7 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
>>> uint64_t base;
>>>
>>> if (cpu->vhyp) {
>>
>> All the checks for cpu->vhyp needs to be changed to check for
>> cpu->vhyp_class now, for all such instances.
>
> It wasn't supposed to, because vhyp != NULL implies vhyp_class != NULL.
> It's supposed to be an equivalent transformation just changing the
> lookup function.
I agree, but not just it appears a bit odd, my only worry is if a future
change cause vhyp_class to be NULL before the control reaches here, this
check wont really serve the purpose. Anyways, not a mandatory
requirement for now, so I shall leave it to your choice.
regards,
Harsh
>
> Okay to leave it as is?
>
> Thanks,
> Nick
>
>>
>> With that,
>>
>> Reviewed-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
>>
>>
>>> - PPCVirtualHypervisorClass *vhc =
>>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
>>> - return vhc->hpt_mask(cpu->vhyp);
>>> + return cpu->vhyp_class->hpt_mask(cpu->vhyp);
>>> }
>>> if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
>>> ppc_v3_pate_t pate;
>>> diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
>>> index 04d6decb2b..c22e8b336d 100644
>>> --- a/hw/ppc/pegasos2.c
>>> +++ b/hw/ppc/pegasos2.c
>>> @@ -400,6 +400,7 @@ static void pegasos2_machine_reset(MachineState *machine, ShutdownCause reason)
>>> machine->fdt = fdt;
>>>
>>> pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine);
>>> + pm->cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(pm->cpu->vhyp);
>>> }
>>>
>>> enum pegasos2_rtas_tokens {
>>> diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
>>> index 9bccddb350..63d0094024 100644
>>> --- a/target/ppc/cpu_init.c
>>> +++ b/target/ppc/cpu_init.c
>>> @@ -6631,6 +6631,7 @@ void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
>>> CPUPPCState *env = &cpu->env;
>>>
>>> cpu->vhyp = vhyp;
>>> + cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(vhyp);
>>>
>>> /*
>>> * With a virtual hypervisor mode we never allow the CPU to go
>>> @@ -7224,9 +7225,7 @@ static void ppc_cpu_exec_enter(CPUState *cs)
>>> PowerPCCPU *cpu = POWERPC_CPU(cs);
>>>
>>> if (cpu->vhyp) {
>>> - PPCVirtualHypervisorClass *vhc =
>>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
>>> - vhc->cpu_exec_enter(cpu->vhyp, cpu);
>>> + cpu->vhyp_class->cpu_exec_enter(cpu->vhyp, cpu);
>>> }
>>> }
>>>
>>> @@ -7235,9 +7234,7 @@ static void ppc_cpu_exec_exit(CPUState *cs)
>>> PowerPCCPU *cpu = POWERPC_CPU(cs);
>>>
>>> if (cpu->vhyp) {
>>> - PPCVirtualHypervisorClass *vhc =
>>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
>>> - vhc->cpu_exec_exit(cpu->vhyp, cpu);
>>> + cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu);
>>> }
>>> }
>>> #endif /* CONFIG_TCG */
>>> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
>>> index 98952de267..445350488c 100644
>>> --- a/target/ppc/excp_helper.c
>>> +++ b/target/ppc/excp_helper.c
>>> @@ -840,9 +840,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp)
>>> * HV mode, we need to keep hypercall support.
>>> */
>>> if (lev == 1 && cpu->vhyp) {
>>> - PPCVirtualHypervisorClass *vhc =
>>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
>>> - vhc->hypercall(cpu->vhyp, cpu);
>>> + cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
>>> powerpc_reset_excp_state(cpu);
>>> return;
>>> }
>>> @@ -1012,9 +1010,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp)
>>> * HV mode, we need to keep hypercall support.
>>> */
>>> if (lev == 1 && cpu->vhyp) {
>>> - PPCVirtualHypervisorClass *vhc =
>>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
>>> - vhc->hypercall(cpu->vhyp, cpu);
>>> + cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
>>> powerpc_reset_excp_state(cpu);
>>> return;
>>> }
>>> @@ -1534,9 +1530,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
>>>
>>> /* "PAPR mode" built-in hypercall emulation */
>>> if (lev == 1 && books_vhyp_handles_hcall(cpu)) {
>>> - PPCVirtualHypervisorClass *vhc =
>>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
>>> - vhc->hypercall(cpu->vhyp, cpu);
>>> + cpu->vhyp_class->hypercall(cpu->vhyp, cpu);
>>> powerpc_reset_excp_state(cpu);
>>> return;
>>> }
>>> @@ -1677,10 +1671,8 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
>>> }
>>>
>>> if ((new_msr & MSR_HVB) && books_vhyp_handles_hv_excp(cpu)) {
>>> - PPCVirtualHypervisorClass *vhc =
>>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
>>> /* Deliver interrupt to L1 by returning from the H_ENTER_NESTED call */
>>> - vhc->deliver_hv_excp(cpu, excp);
>>> + cpu->vhyp_class->deliver_hv_excp(cpu, excp);
>>>
>>> powerpc_reset_excp_state(cpu);
>>>
>>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
>>> index 26fa9d0575..5b5b96ab6b 100644
>>> --- a/target/ppc/kvm.c
>>> +++ b/target/ppc/kvm.c
>>> @@ -862,9 +862,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu)
>>> sregs.pvr = env->spr[SPR_PVR];
>>>
>>> if (cpu->vhyp) {
>>> - PPCVirtualHypervisorClass *vhc =
>>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
>>> - sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
>>> + sregs.u.s.sdr1 = cpu->vhyp_class->encode_hpt_for_kvm_pr(cpu->vhyp);
>>> } else {
>>> sregs.u.s.sdr1 = env->spr[SPR_SDR1];
>>> }
>>> diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
>>> index d645c0bb94..196b4b2a48 100644
>>> --- a/target/ppc/mmu-hash64.c
>>> +++ b/target/ppc/mmu-hash64.c
>>> @@ -516,9 +516,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
>>> const ppc_hash_pte64_t *hptes;
>>>
>>> if (cpu->vhyp) {
>>> - PPCVirtualHypervisorClass *vhc =
>>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
>>> - return vhc->map_hptes(cpu->vhyp, ptex, n);
>>> + return cpu->vhyp_class->map_hptes(cpu->vhyp, ptex, n);
>>> }
>>> base = ppc_hash64_hpt_base(cpu);
>>>
>>> @@ -538,9 +536,7 @@ void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
>>> hwaddr ptex, int n)
>>> {
>>> if (cpu->vhyp) {
>>> - PPCVirtualHypervisorClass *vhc =
>>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
>>> - vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n);
>>> + cpu->vhyp_class->unmap_hptes(cpu->vhyp, hptes, ptex, n);
>>> return;
>>> }
>>>
>>> @@ -820,9 +816,7 @@ static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
>>> hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R;
>>>
>>> if (cpu->vhyp) {
>>> - PPCVirtualHypervisorClass *vhc =
>>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
>>> - vhc->hpte_set_r(cpu->vhyp, ptex, pte1);
>>> + cpu->vhyp_class->hpte_set_r(cpu->vhyp, ptex, pte1);
>>> return;
>>> }
>>> base = ppc_hash64_hpt_base(cpu);
>>> @@ -837,9 +831,7 @@ static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1)
>>> hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C;
>>>
>>> if (cpu->vhyp) {
>>> - PPCVirtualHypervisorClass *vhc =
>>> - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
>>> - vhc->hpte_set_c(cpu->vhyp, ptex, pte1);
>>> + cpu->vhyp_class->hpte_set_c(cpu->vhyp, ptex, pte1);
>>> return;
>>> }
>>> base = ppc_hash64_hpt_base(cpu);
>>> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
>>> index 5823e039e6..496ba87a95 100644
>>> --- a/target/ppc/mmu-radix64.c
>>> +++ b/target/ppc/mmu-radix64.c
>>> @@ -677,9 +677,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr,
>>>
>>> /* Get Partition Table */
>>> if (cpu->vhyp) {
>>> - PPCVirtualHypervisorClass *vhc;
>>> - vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
>>> - if (!vhc->get_pate(cpu->vhyp, cpu, lpid, &pate)) {
>>> + if (!cpu->vhyp_class->get_pate(cpu->vhyp, cpu, lpid, &pate)) {
>>> if (guest_visible) {
>>> ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr,
>>> DSISR_R_BADCONFIG);
>
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] spapr: avoid overhead of finding vhyp class in critical operations
2024-03-12 8:56 ` Harsh Prateek Bora
@ 2024-03-12 10:40 ` Nicholas Piggin
0 siblings, 0 replies; 6+ messages in thread
From: Nicholas Piggin @ 2024-03-12 10:40 UTC (permalink / raw)
To: Harsh Prateek Bora, qemu-ppc
Cc: qemu-devel, BALATON Zoltan, David Gibson, Daniel Henrique Barboza
On Tue Mar 12, 2024 at 6:56 PM AEST, Harsh Prateek Bora wrote:
>
>
> On 3/12/24 14:18, Nicholas Piggin wrote:
> > On Tue Mar 12, 2024 at 4:38 PM AEST, Harsh Prateek Bora wrote:
> >> Hi Nick,
> >>
> >> One minor comment below:
> >>
> >> On 2/24/24 13:03, Nicholas Piggin wrote:
> >>> PPC_VIRTUAL_HYPERVISOR_GET_CLASS is used in critical operations like
> >>> interrupts and TLB misses and is quite costly. Running the
> >>> kvm-unit-tests sieve program with radix MMU enabled thrashes the TCG
> >>> TLB and spends a lot of time in TLB and page table walking code. The
> >>> test takes 67 seconds to complete with a lot of time being spent in
> >>> code related to finding the vhyp class:
> >>>
> >>> 12.01% [.] g_str_hash
> >>> 8.94% [.] g_hash_table_lookup
> >>> 8.06% [.] object_class_dynamic_cast
> >>> 6.21% [.] address_space_ldq
> >>> 4.94% [.] __strcmp_avx2
> >>> 4.28% [.] tlb_set_page_full
> >>> 4.08% [.] address_space_translate_internal
> >>> 3.17% [.] object_class_dynamic_cast_assert
> >>> 2.84% [.] ppc_radix64_xlate
> >>>
> >>> Keep a pointer to the class and avoid this lookup. This reduces the
> >>> execution time to 40 seconds.
> >>>
> >>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> >>> ---
> >>> This feels a bit ugly, but the performance problem of looking up the
> >>> class in fast paths can't be ignored. Is there a "nicer" way to get the
> >>> same result?
> >>>
> >>> Thanks,
> >>> Nick
> >>>
> >>> target/ppc/cpu.h | 3 ++-
> >>> target/ppc/mmu-book3s-v3.h | 4 +---
> >>> hw/ppc/pegasos2.c | 1 +
> >>> target/ppc/cpu_init.c | 9 +++------
> >>> target/ppc/excp_helper.c | 16 ++++------------
> >>> target/ppc/kvm.c | 4 +---
> >>> target/ppc/mmu-hash64.c | 16 ++++------------
> >>> target/ppc/mmu-radix64.c | 4 +---
> >>> 8 files changed, 17 insertions(+), 40 deletions(-)
> >>>
> >>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> >>> index ec14574d14..eb85d9aa71 100644
> >>> --- a/target/ppc/cpu.h
> >>> +++ b/target/ppc/cpu.h
> >>> @@ -1437,6 +1437,7 @@ struct ArchCPU {
> >>> int vcpu_id;
> >>> uint32_t compat_pvr;
> >>> PPCVirtualHypervisor *vhyp;
> >>> + PPCVirtualHypervisorClass *vhyp_class;
> >>> void *machine_data;
> >>> int32_t node_id; /* NUMA node this CPU belongs to */
> >>> PPCHash64Options *hash64_opts;
> >>> @@ -1535,7 +1536,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass,
> >>>
> >>> static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu)
> >>> {
> >>> - return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu);
> >>> + return cpu->vhyp_class->cpu_in_nested(cpu);
> >>> }
> >>> #endif /* CONFIG_USER_ONLY */
> >>>
> >>> diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
> >>> index 674377a19e..f3f7993958 100644
> >>> --- a/target/ppc/mmu-book3s-v3.h
> >>> +++ b/target/ppc/mmu-book3s-v3.h
> >>> @@ -108,9 +108,7 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
> >>> uint64_t base;
> >>>
> >>> if (cpu->vhyp) {
> >>
> >> All the checks for cpu->vhyp needs to be changed to check for
> >> cpu->vhyp_class now, for all such instances.
> >
> > It wasn't supposed to, because vhyp != NULL implies vhyp_class != NULL.
> > It's supposed to be an equivalent transformation just changing the
> > lookup function.
>
> I agree, but not just it appears a bit odd, my only worry is if a future
> change cause vhyp_class to be NULL before the control reaches here, this
> check wont really serve the purpose. Anyways, not a mandatory
> requirement for now, so I shall leave it to your choice.
It does look like it should be the other way around without context
of s/PPC_V_H_G_C(cpu->vhyp)/cpu->vhyp_class/
We'll never have a vhyp != NULL && vhyp_class == NULL though, so
should be okay.
Thanks,
Nick
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2024-03-12 10:40 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-02-24 7:33 [PATCH] spapr: avoid overhead of finding vhyp class in critical operations Nicholas Piggin
2024-02-26 3:01 ` David Gibson
2024-03-12 6:38 ` Harsh Prateek Bora
2024-03-12 8:48 ` Nicholas Piggin
2024-03-12 8:56 ` Harsh Prateek Bora
2024-03-12 10:40 ` Nicholas Piggin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).