* [PATCH] KVM: x86: Add tracepoint for vCPU wait/yield paths
@ 2026-05-21 9:16 zhanghao
2026-05-21 14:05 ` Sean Christopherson
0 siblings, 1 reply; 4+ messages in thread
From: zhanghao @ 2026-05-21 9:16 UTC (permalink / raw)
To: Sean Christopherson; +Cc: Paolo Bonzini, kvm
From 0c8d4428390a1238a956f713c1ddced18eac83da Mon Sep 17 00:00:00 2001
From: Hao Zhang <zhanghao1@kylinos.cn>
Date: Thu, 21 May 2026 16:30:37 +0800
Add a KVM x86 tracepoint that reports when a vCPU reaches selected
wait/yield handling paths in KVM.
The tracepoint is informational only. It does not change KVM scheduling
behavior, vCPU placement, directed-yield policy, or host scheduler state.
It also does not report the outcome of scheduling decisions.
In particular, the target field is the guest-supplied value when one
exists. It does not imply that the target exists, is runnable, or that a
directed yield succeeded.
Report events for PAUSE-loop exiting after hardware has already delivered
a PLE exit, guest HLT emulation, and paravirtual yield-style paths,
including KVM_HC_SCHED_YIELD, Hyper-V long spin wait, and Xen sched yield.
Use -1 as the target when there is no explicit guest-supplied target.
Signed-off-by: Hao Zhang <zhanghao1@kylinos.cn>
---
arch/x86/kvm/hyperv.c | 2 ++
arch/x86/kvm/svm/svm.c | 2 ++
arch/x86/kvm/trace.h | 46 ++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/vmx/vmx.c | 2 ++
arch/x86/kvm/x86.c | 6 ++++++
arch/x86/kvm/xen.c | 2 ++
6 files changed, 60 insertions(+)
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4438ecac9a89..7f3b4698083f 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2593,6 +2593,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
}
+ trace_kvm_sched_event(vcpu->vcpu_id, KVM_SCHED_EVT_PV_YIELD,
+ KVM_SCHED_INVALID_TARGET);
kvm_vcpu_on_spin(vcpu, true);
break;
case HVCALL_SIGNAL_EVENT:
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e02a38da5296..a3390a051ab2 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3199,6 +3199,8 @@ static int pause_interception(struct kvm_vcpu *vcpu)
grow_ple_window(vcpu);
+ trace_kvm_sched_event(vcpu->vcpu_id, KVM_SCHED_EVT_PLE,
+ KVM_SCHED_INVALID_TARGET);
kvm_vcpu_on_spin(vcpu, in_kernel);
return kvm_skip_emulated_instruction(vcpu);
}
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0db25bba17f6..83b291de9665 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -11,6 +11,17 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
+#ifndef _KVM_SCHED_EVENT_TYPES
+#define _KVM_SCHED_EVENT_TYPES
+enum kvm_sched_event_type {
+ KVM_SCHED_EVT_PLE,
+ KVM_SCHED_EVT_HLT,
+ KVM_SCHED_EVT_PV_YIELD,
+};
+
+#define KVM_SCHED_INVALID_TARGET (-1LL)
+#endif
+
#ifdef CREATE_TRACE_POINTS
#define tracing_kvm_rip_read(vcpu) ({ \
typeof(vcpu) __vcpu = vcpu; \
@@ -1965,6 +1976,41 @@ TRACE_EVENT(kvm_rmp_fault,
__entry->error_code, __entry->rmp_level, __entry->psmash_ret)
);
+TRACE_DEFINE_ENUM(KVM_SCHED_EVT_PLE);
+TRACE_DEFINE_ENUM(KVM_SCHED_EVT_HLT);
+TRACE_DEFINE_ENUM(KVM_SCHED_EVT_PV_YIELD);
+
+/*
+ * Trace when a vCPU reaches selected wait/yield handling paths. This is
+ * informational only and does not report the outcome of scheduling decisions.
+ * @target is the guest supplied value when one exists, and does not imply that
+ * the target exists, is runnable, or that a directed yield succeeded.
+ */
+TRACE_EVENT(kvm_sched_event,
+ TP_PROTO(unsigned int vcpu_id, int event, s64 target),
+ TP_ARGS(vcpu_id, event, target),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, vcpu_id)
+ __field(int, event)
+ __field(s64, target)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu_id;
+ __entry->event = event;
+ __entry->target = target;
+ ),
+
+ TP_printk("vcpu %u event %s target %lld",
+ __entry->vcpu_id,
+ __print_symbolic(__entry->event,
+ { KVM_SCHED_EVT_PLE, "ple" },
+ { KVM_SCHED_EVT_HLT, "hlt" },
+ { KVM_SCHED_EVT_PV_YIELD, "pv_yield" }),
+ __entry->target)
+);
+
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 49feecb286b2..d60026c64fc9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6104,6 +6104,8 @@ static int handle_pause(struct kvm_vcpu *vcpu)
* never set PAUSE_EXITING and just set PLE if supported,
* so the vcpu must be CPL=0 if it gets a PAUSE exit.
*/
+ trace_kvm_sched_event(vcpu->vcpu_id, KVM_SCHED_EVT_PLE,
+ KVM_SCHED_INVALID_TARGET);
kvm_vcpu_on_spin(vcpu, true);
return kvm_skip_emulated_instruction(vcpu);
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a1b63c63d1a..09d1d8242017 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10482,6 +10482,7 @@ int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, int cpl,
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
break;
+ trace_kvm_sched_event(vcpu->vcpu_id, KVM_SCHED_EVT_PV_YIELD, a0);
kvm_sched_yield(vcpu, a0);
ret = 0;
break;
@@ -11807,6 +11808,10 @@ static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
* handling wake events.
*/
++vcpu->stat.halt_exits;
+
+ if (reason == KVM_EXIT_HLT)
+ trace_kvm_sched_event(vcpu->vcpu_id, KVM_SCHED_EVT_HLT,
+ KVM_SCHED_INVALID_TARGET);
if (lapic_in_kernel(vcpu)) {
if (kvm_vcpu_has_events(vcpu) || vcpu->arch.pv.pv_unhalted)
state = KVM_MP_STATE_RUNNABLE;
@@ -14553,6 +14558,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_rmp_fault);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_sched_event);
static int __init kvm_x86_init(void)
{
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 91fd3673c09a..f7033f0e7e13 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1586,6 +1586,8 @@ static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode,
return true;
fallthrough;
case SCHEDOP_yield:
+ trace_kvm_sched_event(vcpu->vcpu_id, KVM_SCHED_EVT_PV_YIELD,
+ KVM_SCHED_INVALID_TARGET);
kvm_vcpu_on_spin(vcpu, true);
*r = 0;
return true;
base-commit: 8bc67e4db64aa72732c474b44ea8622062c903f0
--
2.25.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [PATCH] KVM: x86: Add tracepoint for vCPU wait/yield paths
2026-05-21 9:16 [PATCH] KVM: x86: Add tracepoint for vCPU wait/yield paths zhanghao
@ 2026-05-21 14:05 ` Sean Christopherson
2026-05-22 2:49 ` zhanghao
0 siblings, 1 reply; 4+ messages in thread
From: Sean Christopherson @ 2026-05-21 14:05 UTC (permalink / raw)
To: zhanghao; +Cc: Paolo Bonzini, kvm
On Thu, May 21, 2026, zhanghao wrote:
> >From 0c8d4428390a1238a956f713c1ddced18eac83da Mon Sep 17 00:00:00 2001
> From: Hao Zhang <zhanghao1@kylinos.cn>
> Date: Thu, 21 May 2026 16:30:37 +0800
>
> Add a KVM x86 tracepoint that reports when a vCPU reaches selected
> wait/yield handling paths in KVM.
Why?
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] KVM: x86: Add tracepoint for vCPU wait/yield paths
2026-05-21 14:05 ` Sean Christopherson
@ 2026-05-22 2:49 ` zhanghao
2026-06-18 16:14 ` Sean Christopherson
0 siblings, 1 reply; 4+ messages in thread
From: zhanghao @ 2026-05-22 2:49 UTC (permalink / raw)
To: Sean Christopherson; +Cc: Paolo Bonzini, kvm
On Thu, May 21, 2026, Sean Christopherson wrote:
> On Thu, May 21, 2026, zhanghao wrote:
> > >From 0c8d4428390a1238a956f713c1ddced18eac83da Mon Sep 17 00:00:00 2001
> > From: Hao Zhang <zhanghao1@kylinos.cn>
> > Date: Thu, 21 May 2026 16:30:37 +0800
> >
> > Add a KVM x86 tracepoint that reports when a vCPU reaches selected
> > wait/yield handling paths in KVM.
>
> Why?
The intent is not to add a scheduling policy hook, but to expose a single
observability point for KVM paths that already have wait/yield semantics.
Today userspace can infer parts of this from lower-level tracepoints, but
only by stitching together different sources:
- PLE can be inferred from VMX/SVM exit reasons via kvm_exit.
- KVM_HC_SCHED_YIELD can be inferred from kvm_hypercall.
- Hyper-V and Xen yield/spin-wait paths have their own hypercall paths.
- HLT may not be visible as a userspace exit when KVM handles the halt
internally, e.g. with in-kernel LAPIC.
Those tracepoints are useful, but they expose lower-level mechanisms rather
than the KVM-level point where the vCPU reaches an existing wait/yield
handling path. Consumers that want to correlate vCPU wait/yield behavior
with host scheduling activity currently need to understand VMX vs. SVM exit
encoding, native KVM hypercall arguments, Hyper-V/Xen hypercall semantics,
and HLT handling differences.
This tracepoint is meant to provide that common KVM-level signal:
- ple: KVM reached the PLE handling path
- hlt: KVM emulated a guest HLT
- pv_yield: KVM reached a paravirtual yield-style path
The tracepoint is informational only. It does not change KVM scheduling
behavior, directed-yield behavior, vCPU placement, or host scheduler state.
The target field is also only the guest-supplied value when one exists; it
does not imply that the target exists, is runnable, or that a directed yield
succeeded.
So the reason for adding a new tracepoint is to avoid requiring userspace
observability tools to reconstruct this higher-level wait/yield signal from
several unrelated low-level tracepoints and architecture-specific details.
Thanks,
ZhangHao
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [PATCH] KVM: x86: Add tracepoint for vCPU wait/yield paths
2026-05-22 2:49 ` zhanghao
@ 2026-06-18 16:14 ` Sean Christopherson
0 siblings, 0 replies; 4+ messages in thread
From: Sean Christopherson @ 2026-06-18 16:14 UTC (permalink / raw)
To: zhanghao; +Cc: Paolo Bonzini, kvm
On Fri, May 22, 2026, zhanghao wrote:
> On Thu, May 21, 2026, Sean Christopherson wrote:
> > On Thu, May 21, 2026, zhanghao wrote:
> > > >From 0c8d4428390a1238a956f713c1ddced18eac83da Mon Sep 17 00:00:00 2001
> > > From: Hao Zhang <zhanghao1@kylinos.cn>
> > > Date: Thu, 21 May 2026 16:30:37 +0800
> > >
> > > Add a KVM x86 tracepoint that reports when a vCPU reaches selected
> > > wait/yield handling paths in KVM.
> >
> > Why?
>
> The intent is not to add a scheduling policy hook, but to expose a single
> observability point for KVM paths that already have wait/yield semantics.
>
> Today userspace can infer parts of this from lower-level tracepoints, but
> only by stitching together different sources:
>
> - PLE can be inferred from VMX/SVM exit reasons via kvm_exit.
> - KVM_HC_SCHED_YIELD can be inferred from kvm_hypercall.
> - Hyper-V and Xen yield/spin-wait paths have their own hypercall paths.
> - HLT may not be visible as a userspace exit when KVM handles the halt
> internally, e.g. with in-kernel LAPIC.
>
> Those tracepoints are useful, but they expose lower-level mechanisms rather
> than the KVM-level point where the vCPU reaches an existing wait/yield
> handling path. Consumers that want to correlate vCPU wait/yield behavior
> with host scheduling activity currently need to understand VMX vs. SVM exit
> encoding, native KVM hypercall arguments, Hyper-V/Xen hypercall semantics,
> and HLT handling differences.
>
> This tracepoint is meant to provide that common KVM-level signal:
>
> - ple: KVM reached the PLE handling path
> - hlt: KVM emulated a guest HLT
> - pv_yield: KVM reached a paravirtual yield-style path
>
> The tracepoint is informational only. It does not change KVM scheduling
> behavior, directed-yield behavior, vCPU placement, or host scheduler state.
> The target field is also only the guest-supplied value when one exists; it
> does not imply that the target exists, is runnable, or that a directed yield
> succeeded.
>
> So the reason for adding a new tracepoint is to avoid requiring userspace
> observability tools to reconstruct this higher-level wait/yield signal from
> several unrelated low-level tracepoints and architecture-specific details.
Tracepoints aren't ABI though, and "observability tools" suggest userspace wants
to build functionality on top of all of this. If this is for debug purposes,
then tools like bpftrace are a much better option.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2026-06-18 16:14 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-21 9:16 [PATCH] KVM: x86: Add tracepoint for vCPU wait/yield paths zhanghao
2026-05-21 14:05 ` Sean Christopherson
2026-05-22 2:49 ` zhanghao
2026-06-18 16:14 ` Sean Christopherson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox