* [PATCH 1/4] KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl
@ 2022-05-18 13:39 Nicholas Piggin
2022-05-18 13:39 ` [PATCH 2/4] powerpc/pseries: Add wait interval counters to VPA Nicholas Piggin
` (3 more replies)
0 siblings, 4 replies; 9+ messages in thread
From: Nicholas Piggin @ 2022-05-18 13:39 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
Stolen time logging in dtl was removed from the P9 path, so guests had
no stolen time accounting. Add it back in a simpler way that still
avoids locks and per-core accounting code.
Fixes: ecb6a7207f92 ("KVM: PPC: Book3S HV P9: Remove most of the vcore logic")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv.c | 49 +++++++++++++++++++++++++++++++++---
1 file changed, 45 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6fa518f6501d..0a0835edb64a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -248,6 +248,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
/*
* We use the vcpu_load/put functions to measure stolen time.
+ *
* Stolen time is counted as time when either the vcpu is able to
* run as part of a virtual core, but the task running the vcore
* is preempted or sleeping, or when the vcpu needs something done
@@ -277,6 +278,12 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
* lock. The stolen times are measured in units of timebase ticks.
* (Note that the != TB_NIL checks below are purely defensive;
* they should never fail.)
+ *
+ * The POWER9 path is simpler, one vcpu per virtual core so the
+ * former case does not exist. If a vcpu is preempted when it is
+ * BUSY_IN_HOST and not ceded or otherwise blocked, then accumulate
+ * the stolen cycles in busy_stolen. RUNNING is not a preemptible
+ * state in the P9 path.
*/
static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb)
@@ -310,8 +317,14 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
unsigned long flags;
u64 now;
- if (cpu_has_feature(CPU_FTR_ARCH_300))
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (vcpu->arch.busy_preempt != TB_NIL) {
+ WARN_ON_ONCE(vcpu->arch.state != KVMPPC_VCPU_BUSY_IN_HOST);
+ vc->stolen_tb += mftb() - vcpu->arch.busy_preempt;
+ vcpu->arch.busy_preempt = TB_NIL;
+ }
return;
+ }
now = mftb();
@@ -339,8 +352,21 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
unsigned long flags;
u64 now;
- if (cpu_has_feature(CPU_FTR_ARCH_300))
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * In the P9 path, RUNNABLE is not preemptible
+ * (nor takes host interrupts)
+ */
+ WARN_ON_ONCE(vcpu->arch.state == KVMPPC_VCPU_RUNNABLE);
+ /*
+ * Account stolen time when preempted while the vcpu task is
+ * running in the kernel (but not in qemu, which is INACTIVE).
+ */
+ if (task_is_running(current) &&
+ vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
+ vcpu->arch.busy_preempt = mftb();
return;
+ }
now = mftb();
@@ -739,6 +765,18 @@ static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
vcpu->arch.dtl.dirty = true;
}
+static void kvmppc_create_dtl_entry_p9(struct kvm_vcpu *vcpu,
+ struct kvmppc_vcore *vc,
+ u64 now)
+{
+ unsigned long stolen;
+
+ stolen = vc->stolen_tb - vcpu->arch.stolen_logged;
+ vcpu->arch.stolen_logged = vc->stolen_tb;
+
+ __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now, stolen);
+}
+
static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
struct kvmppc_vcore *vc)
{
@@ -4470,7 +4508,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc = vcpu->arch.vcore;
vcpu->arch.ceded = 0;
vcpu->arch.run_task = current;
- vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
/* See if the MMU is ready to go */
@@ -4497,6 +4534,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
/* flags save not required, but irq_pmu has no disable/enable API */
powerpc_local_irq_pmu_save(flags);
+ vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+
if (signal_pending(current))
goto sigpend;
if (need_resched() || !kvm->arch.mmu_ready)
@@ -4536,7 +4575,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
tb = mftb();
- __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0);
+ kvmppc_create_dtl_entry_p9(vcpu, vc, tb + vc->tb_offset);
trace_kvm_guest_enter(vcpu);
@@ -4577,6 +4616,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
powerpc_local_irq_pmu_restore(flags);
@@ -4639,6 +4679,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
out:
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
powerpc_local_irq_pmu_restore(flags);
preempt_enable();
goto done;
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/4] powerpc/pseries: Add wait interval counters to VPA
2022-05-18 13:39 [PATCH 1/4] KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl Nicholas Piggin
@ 2022-05-18 13:39 ` Nicholas Piggin
2022-05-27 20:25 ` Fabiano Rosas
2022-05-18 13:39 ` [PATCH 3/4] KVM: PPC: Book3S HV: Implement scheduling wait interval counters in the VPA Nicholas Piggin
` (2 subsequent siblings)
3 siblings, 1 reply; 9+ messages in thread
From: Nicholas Piggin @ 2022-05-18 13:39 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
The hypervisor exposes accumulated partition scheduling interval times
in the VPA (lppaca). These can be used to implement a simple stolen time
in the guest without complex and costly dtl scanning.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/lppaca.h | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index c390ec377bae..34d44cb17c87 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -104,14 +104,18 @@ struct lppaca {
volatile __be32 dispersion_count; /* dispatch changed physical cpu */
volatile __be64 cmo_faults; /* CMO page fault count */
volatile __be64 cmo_fault_time; /* CMO page fault time */
- u8 reserved10[104];
+ u8 reserved10[64]; /* [S]PURR expropriated/donated */
+ volatile __be64 enqueue_dispatch_tb; /* Total TB enqueue->dispatch */
+ volatile __be64 ready_enqueue_tb; /* Total TB ready->enqueue */
+ volatile __be64 wait_ready_tb; /* Total TB wait->ready */
+ u8 reserved11[16];
/* cacheline 4-5 */
__be32 page_ins; /* CMO Hint - # page ins by OS */
- u8 reserved11[148];
+ u8 reserved12[148];
volatile __be64 dtl_idx; /* Dispatch Trace Log head index */
- u8 reserved12[96];
+ u8 reserved13[96];
} ____cacheline_aligned;
#define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr)
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 3/4] KVM: PPC: Book3S HV: Implement scheduling wait interval counters in the VPA
2022-05-18 13:39 [PATCH 1/4] KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl Nicholas Piggin
2022-05-18 13:39 ` [PATCH 2/4] powerpc/pseries: Add wait interval counters to VPA Nicholas Piggin
@ 2022-05-18 13:39 ` Nicholas Piggin
2022-05-27 20:25 ` Fabiano Rosas
2022-05-18 13:39 ` [PATCH 4/4] powerpc/pseries: Implement CONFIG_PARAVIRT_TIME_ACCOUNTING Nicholas Piggin
2022-05-27 18:45 ` [PATCH 1/4] KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl Fabiano Rosas
3 siblings, 1 reply; 9+ messages in thread
From: Nicholas Piggin @ 2022-05-18 13:39 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
PAPR specifies accumulated virtual processor wait intervals that relate
to partition scheduling interval times. Implement these counters in the
same way as they are repoted by dtl.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv.c | 62 ++++++++++++++++++++++++------------
1 file changed, 41 insertions(+), 21 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 0a0835edb64a..9f8795d2b0c3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -732,16 +732,15 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
}
static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+ struct lppaca *vpa,
unsigned int pcpu, u64 now,
unsigned long stolen)
{
struct dtl_entry *dt;
- struct lppaca *vpa;
dt = vcpu->arch.dtl_ptr;
- vpa = vcpu->arch.vpa.pinned_addr;
- if (!dt || !vpa)
+ if (!dt)
return;
dt->dispatch_reason = 7;
@@ -762,29 +761,23 @@ static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
/* order writing *dt vs. writing vpa->dtl_idx */
smp_wmb();
vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index);
- vcpu->arch.dtl.dirty = true;
-}
-
-static void kvmppc_create_dtl_entry_p9(struct kvm_vcpu *vcpu,
- struct kvmppc_vcore *vc,
- u64 now)
-{
- unsigned long stolen;
- stolen = vc->stolen_tb - vcpu->arch.stolen_logged;
- vcpu->arch.stolen_logged = vc->stolen_tb;
-
- __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now, stolen);
+ /* vcpu->arch.dtl.dirty is set by the caller */
}
-static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
- struct kvmppc_vcore *vc)
+static void kvmppc_update_vpa_dispatch(struct kvm_vcpu *vcpu,
+ struct kvmppc_vcore *vc)
{
+ struct lppaca *vpa;
unsigned long stolen;
unsigned long core_stolen;
u64 now;
unsigned long flags;
+ vpa = vcpu->arch.vpa.pinned_addr;
+ if (!vpa)
+ return;
+
now = mftb();
core_stolen = vcore_stolen_time(vc, now);
@@ -795,7 +788,34 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
vcpu->arch.busy_stolen = 0;
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
- __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now + vc->tb_offset, stolen);
+ vpa->enqueue_dispatch_tb = cpu_to_be64(be64_to_cpu(vpa->enqueue_dispatch_tb) + stolen);
+
+ __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now + vc->tb_offset, stolen);
+
+ vcpu->arch.vpa.dirty = true;
+}
+
+static void kvmppc_update_vpa_dispatch_p9(struct kvm_vcpu *vcpu,
+ struct kvmppc_vcore *vc,
+ u64 now)
+{
+ struct lppaca *vpa;
+ unsigned long stolen;
+ unsigned long stolen_delta;
+
+ vpa = vcpu->arch.vpa.pinned_addr;
+ if (!vpa)
+ return;
+
+ stolen = vc->stolen_tb;
+ stolen_delta = stolen - vcpu->arch.stolen_logged;
+ vcpu->arch.stolen_logged = stolen;
+
+ vpa->enqueue_dispatch_tb = cpu_to_be64(stolen);
+
+ __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now, stolen_delta);
+
+ vcpu->arch.vpa.dirty = true;
}
/* See if there is a doorbell interrupt pending for a vcpu */
@@ -3820,7 +3840,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
* kvmppc_core_prepare_to_enter.
*/
kvmppc_start_thread(vcpu, pvc);
- kvmppc_create_dtl_entry(vcpu, pvc);
+ kvmppc_update_vpa_dispatch(vcpu, pvc);
trace_kvm_guest_enter(vcpu);
if (!vcpu->arch.ptid)
thr0_done = true;
@@ -4392,7 +4412,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
if ((vc->vcore_state == VCORE_PIGGYBACK ||
vc->vcore_state == VCORE_RUNNING) &&
!VCORE_IS_EXITING(vc)) {
- kvmppc_create_dtl_entry(vcpu, vc);
+ kvmppc_update_vpa_dispatch(vcpu, vc);
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
@@ -4575,7 +4595,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
tb = mftb();
- kvmppc_create_dtl_entry_p9(vcpu, vc, tb + vc->tb_offset);
+ kvmppc_update_vpa_dispatch_p9(vcpu, vc, tb + vc->tb_offset);
trace_kvm_guest_enter(vcpu);
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 4/4] powerpc/pseries: Implement CONFIG_PARAVIRT_TIME_ACCOUNTING
2022-05-18 13:39 [PATCH 1/4] KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl Nicholas Piggin
2022-05-18 13:39 ` [PATCH 2/4] powerpc/pseries: Add wait interval counters to VPA Nicholas Piggin
2022-05-18 13:39 ` [PATCH 3/4] KVM: PPC: Book3S HV: Implement scheduling wait interval counters in the VPA Nicholas Piggin
@ 2022-05-18 13:39 ` Nicholas Piggin
2022-05-27 20:47 ` Fabiano Rosas
2022-06-03 10:57 ` shrikanth suresh hegde
2022-05-27 18:45 ` [PATCH 1/4] KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl Fabiano Rosas
3 siblings, 2 replies; 9+ messages in thread
From: Nicholas Piggin @ 2022-05-18 13:39 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Nicholas Piggin
CONFIG_VIRT_CPU_ACCOUNTING_GEN under pseries does not implement
stolen time accounting. Implement it with the paravirt time
accounting option.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
.../admin-guide/kernel-parameters.txt | 6 +++---
arch/powerpc/include/asm/paravirt.h | 12 ++++++++++++
arch/powerpc/platforms/pseries/Kconfig | 8 ++++++++
arch/powerpc/platforms/pseries/lpar.c | 11 +++++++++++
arch/powerpc/platforms/pseries/setup.c | 19 +++++++++++++++++++
5 files changed, 53 insertions(+), 3 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 3f1cc5e317ed..855fc7b02261 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3604,9 +3604,9 @@
[X86,PV_OPS] Disable paravirtualized VMware scheduler
clock and use the default one.
- no-steal-acc [X86,PV_OPS,ARM64] Disable paravirtualized steal time
- accounting. steal time is computed, but won't
- influence scheduler behaviour
+ no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES] Disable paravirtualized
+ steal time accounting. steal time is computed, but
+ won't influence scheduler behaviour
nolapic [X86-32,APIC] Do not enable or use the local APIC.
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
index eb7df559ae74..f5ba1a3c41f8 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -21,6 +21,18 @@ static inline bool is_shared_processor(void)
return static_branch_unlikely(&shared_processor);
}
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 pseries_paravirt_steal_clock(int cpu);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+ return pseries_paravirt_steal_clock(cpu);
+}
+#endif
+
/* If bit 0 is set, the cpu has been ceded, conferred, or preempted */
static inline u32 yield_count_of(int cpu)
{
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index f7fd91d153a4..d4306ebdca5e 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -24,13 +24,21 @@ config PPC_PSERIES
select SWIOTLB
default y
+config PARAVIRT
+ bool
+
config PARAVIRT_SPINLOCKS
bool
+config PARAVIRT_TIME_ACCOUNTING
+ select PARAVIRT
+ bool
+
config PPC_SPLPAR
bool "Support for shared-processor logical partitions"
depends on PPC_PSERIES
select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS
+ select PARAVIRT_TIME_ACCOUNTING if VIRT_CPU_ACCOUNTING_GEN
default y
help
Enabling this option will make the kernel run more efficiently
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 760581c5752f..1965b7d7d8f1 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -661,6 +661,17 @@ static int __init vcpudispatch_stats_procfs_init(void)
}
machine_device_initcall(pseries, vcpudispatch_stats_procfs_init);
+
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+u64 pseries_paravirt_steal_clock(int cpu)
+{
+ struct lppaca *lppaca = &lppaca_of(cpu);
+
+ return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
+ be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb));
+}
+#endif
+
#endif /* CONFIG_PPC_SPLPAR */
void vpa_init(int cpu)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 955ff8aa1644..691c9add4a5a 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -78,6 +78,20 @@
DEFINE_STATIC_KEY_FALSE(shared_processor);
EXPORT_SYMBOL(shared_processor);
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+ steal_acc = false;
+ return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+#endif
+
int CMO_PrPSP = -1;
int CMO_SecPSP = -1;
unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
@@ -831,6 +845,11 @@ static void __init pSeries_setup_arch(void)
if (lppaca_shared_proc(get_lppaca())) {
static_branch_enable(&shared_processor);
pv_spinlocks_init();
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+ static_key_slow_inc(¶virt_steal_enabled);
+ if (steal_acc)
+ static_key_slow_inc(¶virt_steal_rq_enabled);
+#endif
}
ppc_md.power_save = pseries_lpar_idle;
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 1/4] KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl
2022-05-18 13:39 [PATCH 1/4] KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl Nicholas Piggin
` (2 preceding siblings ...)
2022-05-18 13:39 ` [PATCH 4/4] powerpc/pseries: Implement CONFIG_PARAVIRT_TIME_ACCOUNTING Nicholas Piggin
@ 2022-05-27 18:45 ` Fabiano Rosas
3 siblings, 0 replies; 9+ messages in thread
From: Fabiano Rosas @ 2022-05-27 18:45 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev; +Cc: Nicholas Piggin
Nicholas Piggin <npiggin@gmail.com> writes:
> Stolen time logging in dtl was removed from the P9 path, so guests had
> no stolen time accounting. Add it back in a simpler way that still
> avoids locks and per-core accounting code.
>
> Fixes: ecb6a7207f92 ("KVM: PPC: Book3S HV P9: Remove most of the vcore logic")
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/powerpc/kvm/book3s_hv.c | 49 +++++++++++++++++++++++++++++++++---
> 1 file changed, 45 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 6fa518f6501d..0a0835edb64a 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -248,6 +248,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
>
> /*
> * We use the vcpu_load/put functions to measure stolen time.
> + *
> * Stolen time is counted as time when either the vcpu is able to
> * run as part of a virtual core, but the task running the vcore
> * is preempted or sleeping, or when the vcpu needs something done
> @@ -277,6 +278,12 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
> * lock. The stolen times are measured in units of timebase ticks.
> * (Note that the != TB_NIL checks below are purely defensive;
> * they should never fail.)
> + *
> + * The POWER9 path is simpler, one vcpu per virtual core so the
> + * former case does not exist. If a vcpu is preempted when it is
> + * BUSY_IN_HOST and not ceded or otherwise blocked, then accumulate
> + * the stolen cycles in busy_stolen. RUNNING is not a preemptible
> + * state in the P9 path.
Do you mean RUNNABLE? The only RUNNING state I see is in relation to the
vcore.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/4] powerpc/pseries: Add wait interval counters to VPA
2022-05-18 13:39 ` [PATCH 2/4] powerpc/pseries: Add wait interval counters to VPA Nicholas Piggin
@ 2022-05-27 20:25 ` Fabiano Rosas
0 siblings, 0 replies; 9+ messages in thread
From: Fabiano Rosas @ 2022-05-27 20:25 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev; +Cc: Nicholas Piggin
Nicholas Piggin <npiggin@gmail.com> writes:
> The hypervisor exposes accumulated partition scheduling interval times
> in the VPA (lppaca). These can be used to implement a simple stolen time
> in the guest without complex and costly dtl scanning.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/powerpc/include/asm/lppaca.h | 10 +++++++---
> 1 file changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
> index c390ec377bae..34d44cb17c87 100644
> --- a/arch/powerpc/include/asm/lppaca.h
> +++ b/arch/powerpc/include/asm/lppaca.h
> @@ -104,14 +104,18 @@ struct lppaca {
> volatile __be32 dispersion_count; /* dispatch changed physical cpu */
> volatile __be64 cmo_faults; /* CMO page fault count */
> volatile __be64 cmo_fault_time; /* CMO page fault time */
> - u8 reserved10[104];
> + u8 reserved10[64]; /* [S]PURR expropriated/donated */
> + volatile __be64 enqueue_dispatch_tb; /* Total TB enqueue->dispatch */
> + volatile __be64 ready_enqueue_tb; /* Total TB ready->enqueue */
> + volatile __be64 wait_ready_tb; /* Total TB wait->ready */
This last one is unused but I assume you are adding anyway it because it
could be later added to lparcfg. So:
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 3/4] KVM: PPC: Book3S HV: Implement scheduling wait interval counters in the VPA
2022-05-18 13:39 ` [PATCH 3/4] KVM: PPC: Book3S HV: Implement scheduling wait interval counters in the VPA Nicholas Piggin
@ 2022-05-27 20:25 ` Fabiano Rosas
0 siblings, 0 replies; 9+ messages in thread
From: Fabiano Rosas @ 2022-05-27 20:25 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev; +Cc: Nicholas Piggin
Nicholas Piggin <npiggin@gmail.com> writes:
> PAPR specifies accumulated virtual processor wait intervals that relate
> to partition scheduling interval times. Implement these counters in the
> same way as they are repoted by dtl.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] powerpc/pseries: Implement CONFIG_PARAVIRT_TIME_ACCOUNTING
2022-05-18 13:39 ` [PATCH 4/4] powerpc/pseries: Implement CONFIG_PARAVIRT_TIME_ACCOUNTING Nicholas Piggin
@ 2022-05-27 20:47 ` Fabiano Rosas
2022-06-03 10:57 ` shrikanth suresh hegde
1 sibling, 0 replies; 9+ messages in thread
From: Fabiano Rosas @ 2022-05-27 20:47 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev; +Cc: Nicholas Piggin
Nicholas Piggin <npiggin@gmail.com> writes:
> CONFIG_VIRT_CPU_ACCOUNTING_GEN under pseries does not implement
> stolen time accounting. Implement it with the paravirt time
> accounting option.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> .../admin-guide/kernel-parameters.txt | 6 +++---
> arch/powerpc/include/asm/paravirt.h | 12 ++++++++++++
> arch/powerpc/platforms/pseries/Kconfig | 8 ++++++++
> arch/powerpc/platforms/pseries/lpar.c | 11 +++++++++++
> arch/powerpc/platforms/pseries/setup.c | 19 +++++++++++++++++++
> 5 files changed, 53 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 3f1cc5e317ed..855fc7b02261 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -3604,9 +3604,9 @@
> [X86,PV_OPS] Disable paravirtualized VMware scheduler
> clock and use the default one.
>
> - no-steal-acc [X86,PV_OPS,ARM64] Disable paravirtualized steal time
> - accounting. steal time is computed, but won't
> - influence scheduler behaviour
> + no-steal-acc [X86,PV_OPS,ARM64,PPC/PSERIES] Disable paravirtualized
> + steal time accounting. steal time is computed, but
> + won't influence scheduler behaviour
>
> nolapic [X86-32,APIC] Do not enable or use the local APIC.
>
> diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
> index eb7df559ae74..f5ba1a3c41f8 100644
> --- a/arch/powerpc/include/asm/paravirt.h
> +++ b/arch/powerpc/include/asm/paravirt.h
> @@ -21,6 +21,18 @@ static inline bool is_shared_processor(void)
> return static_branch_unlikely(&shared_processor);
> }
>
> +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
> +extern struct static_key paravirt_steal_enabled;
> +extern struct static_key paravirt_steal_rq_enabled;
> +
> +u64 pseries_paravirt_steal_clock(int cpu);
> +
> +static inline u64 paravirt_steal_clock(int cpu)
> +{
> + return pseries_paravirt_steal_clock(cpu);
> +}
> +#endif
> +
> /* If bit 0 is set, the cpu has been ceded, conferred, or preempted */
> static inline u32 yield_count_of(int cpu)
> {
> diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
> index f7fd91d153a4..d4306ebdca5e 100644
> --- a/arch/powerpc/platforms/pseries/Kconfig
> +++ b/arch/powerpc/platforms/pseries/Kconfig
> @@ -24,13 +24,21 @@ config PPC_PSERIES
> select SWIOTLB
> default y
>
> +config PARAVIRT
> + bool
> +
In file included from ../kernel/sched/build_utility.c:53:
../kernel/sched/sched.h:87:11: fatal error: asm/paravirt_api_clock.h: No such file or directory
87 | # include <asm/paravirt_api_clock.h>
$ find . -name paravirt_api_clock.h
./arch/arm64/include/asm/paravirt_api_clock.h
./arch/x86/include/asm/paravirt_api_clock.h
./arch/arm/include/asm/paravirt_api_clock.h
> config PARAVIRT_SPINLOCKS
> bool
>
> +config PARAVIRT_TIME_ACCOUNTING
> + select PARAVIRT
> + bool
> +
> config PPC_SPLPAR
> bool "Support for shared-processor logical partitions"
> depends on PPC_PSERIES
> select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS
> + select PARAVIRT_TIME_ACCOUNTING if VIRT_CPU_ACCOUNTING_GEN
> default y
> help
> Enabling this option will make the kernel run more efficiently
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 4/4] powerpc/pseries: Implement CONFIG_PARAVIRT_TIME_ACCOUNTING
2022-05-18 13:39 ` [PATCH 4/4] powerpc/pseries: Implement CONFIG_PARAVIRT_TIME_ACCOUNTING Nicholas Piggin
2022-05-27 20:47 ` Fabiano Rosas
@ 2022-06-03 10:57 ` shrikanth suresh hegde
1 sibling, 0 replies; 9+ messages in thread
From: shrikanth suresh hegde @ 2022-06-03 10:57 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev
On 5/18/22 7:09 PM, Nicholas Piggin wrote:
> CONFIG_VIRT_CPU_ACCOUNTING_GEN under pseries does not implement
> stolen time accounting. Implement it with the paravirt time
> accounting option.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Patch fails to compile with CONFIG_PARAVIRT=y with below error.
In file included from kernel/sched/core.c:81:
kernel/sched/sched.h:87:11: fatal error: asm/paravirt_api_clock.h: No
such file or directory
87 | # include <asm/paravirt_api_clock.h>
compilation terminated.
after adding the file, it compiled. Please add the file as well. patch i did.
diff --git a/arch/powerpc/include/asm/paravirt_api_clock.h
b/arch/powerpc/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/powerpc/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
After successful compilation, it was tested on Power10 Shared LPAR. system has
two LPAR. we will call first one LPAR1 and second one as LPAR2. Test was
carried out in SMT=1. Similar observation was seen in SMT=8 as well.
LPAR config header from each LPAR is below. LPAR1 is twice as big as LPAR2.
Since Both are sharing the same underlying hardware, work stealing will happen
when both the LPAR's are contending for the same resource.
LPAR1:
type=Shared mode=Uncapped smt=Off lcpu=40 mem=2094637056 kB cpus=40 ent=20.00
LPAR2:
type=Shared mode=Uncapped smt=Off lcpu=20 mem=2083908608 kB cpus=40 ent=10.00
mpstat was used to check for the utilization. stress-ng has been used as the
workload. Few cases are tested. when the both LPAR are idle there is no steal
time. when LPAR1 starts running at 100% which consumes all of the physical
resource, steal time starts to get accounted. With LPAR1 running at 100% and
LPAR2 starts running, steal time starts increasing. This is as expected. When
the LPAR2 Load is increased further, steal time increases further.
Case 1: 0% LPAR1; 0% LPAR2
CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle
all 0.00 0.00 0.05 0.00 0.00 0.00 0.00 0.00 0.00 99.95
Case 2: 100% LPAR1; 0% LPAR2
CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle
all 97.68 0.00 0.00 0.00 0.00 0.00 2.32 0.00 0.00 0.00
Case 3: 100% LPAR1; 50% LPAR2
CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle
all 86.34 0.00 0.10 0.00 0.00 0.03 13.54 0.00 0.00 0.00
Case 4: 100% LPAR1; 100% LPAR2
CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle
all 78.54 0.00 0.07 0.00 0.00 0.02 21.36 0.00 0.00 0.00
Case 5: 50% LPAR1; 100% LPAR2
CPU %usr %nice %sys %iowait %irq %soft %steal %guest %gnice %idle
all 49.37 0.00 0.00 0.00 0.00 0.00 1.17 0.00 0.00 49.47
Patch is accounting for the steal time and basic tests are holding good.
-- Shrikanth Hegde
^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2022-06-03 10:58 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-05-18 13:39 [PATCH 1/4] KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl Nicholas Piggin
2022-05-18 13:39 ` [PATCH 2/4] powerpc/pseries: Add wait interval counters to VPA Nicholas Piggin
2022-05-27 20:25 ` Fabiano Rosas
2022-05-18 13:39 ` [PATCH 3/4] KVM: PPC: Book3S HV: Implement scheduling wait interval counters in the VPA Nicholas Piggin
2022-05-27 20:25 ` Fabiano Rosas
2022-05-18 13:39 ` [PATCH 4/4] powerpc/pseries: Implement CONFIG_PARAVIRT_TIME_ACCOUNTING Nicholas Piggin
2022-05-27 20:47 ` Fabiano Rosas
2022-06-03 10:57 ` shrikanth suresh hegde
2022-05-27 18:45 ` [PATCH 1/4] KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl Fabiano Rosas
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).