* [PATCH] KVM: x86: Add tracepoint for vCPU wait/yield paths
@ 2026-05-21 9:16 zhanghao
2026-05-21 14:05 ` Sean Christopherson
0 siblings, 1 reply; 3+ messages in thread
From: zhanghao @ 2026-05-21 9:16 UTC (permalink / raw)
To: Sean Christopherson; +Cc: Paolo Bonzini, kvm
From 0c8d4428390a1238a956f713c1ddced18eac83da Mon Sep 17 00:00:00 2001
From: Hao Zhang <zhanghao1@kylinos.cn>
Date: Thu, 21 May 2026 16:30:37 +0800
Add a KVM x86 tracepoint that reports when a vCPU reaches selected
wait/yield handling paths in KVM.
The tracepoint is informational only. It does not change KVM scheduling
behavior, vCPU placement, directed-yield policy, or host scheduler state.
It also does not report the outcome of scheduling decisions.
In particular, the target field is the guest-supplied value when one
exists. It does not imply that the target exists, is runnable, or that a
directed yield succeeded.
Report events for PAUSE-loop exiting after hardware has already delivered
a PLE exit, guest HLT emulation, and paravirtual yield-style paths,
including KVM_HC_SCHED_YIELD, Hyper-V long spin wait, and Xen sched yield.
Use -1 as the target when there is no explicit guest-supplied target.
Signed-off-by: Hao Zhang <zhanghao1@kylinos.cn>
---
arch/x86/kvm/hyperv.c | 2 ++
arch/x86/kvm/svm/svm.c | 2 ++
arch/x86/kvm/trace.h | 46 ++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/vmx/vmx.c | 2 ++
arch/x86/kvm/x86.c | 6 ++++++
arch/x86/kvm/xen.c | 2 ++
6 files changed, 60 insertions(+)
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4438ecac9a89..7f3b4698083f 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2593,6 +2593,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
}
+ trace_kvm_sched_event(vcpu->vcpu_id, KVM_SCHED_EVT_PV_YIELD,
+ KVM_SCHED_INVALID_TARGET);
kvm_vcpu_on_spin(vcpu, true);
break;
case HVCALL_SIGNAL_EVENT:
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e02a38da5296..a3390a051ab2 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3199,6 +3199,8 @@ static int pause_interception(struct kvm_vcpu *vcpu)
grow_ple_window(vcpu);
+ trace_kvm_sched_event(vcpu->vcpu_id, KVM_SCHED_EVT_PLE,
+ KVM_SCHED_INVALID_TARGET);
kvm_vcpu_on_spin(vcpu, in_kernel);
return kvm_skip_emulated_instruction(vcpu);
}
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 0db25bba17f6..83b291de9665 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -11,6 +11,17 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
+#ifndef _KVM_SCHED_EVENT_TYPES
+#define _KVM_SCHED_EVENT_TYPES
+enum kvm_sched_event_type {
+ KVM_SCHED_EVT_PLE,
+ KVM_SCHED_EVT_HLT,
+ KVM_SCHED_EVT_PV_YIELD,
+};
+
+#define KVM_SCHED_INVALID_TARGET (-1LL)
+#endif
+
#ifdef CREATE_TRACE_POINTS
#define tracing_kvm_rip_read(vcpu) ({ \
typeof(vcpu) __vcpu = vcpu; \
@@ -1965,6 +1976,41 @@ TRACE_EVENT(kvm_rmp_fault,
__entry->error_code, __entry->rmp_level, __entry->psmash_ret)
);
+TRACE_DEFINE_ENUM(KVM_SCHED_EVT_PLE);
+TRACE_DEFINE_ENUM(KVM_SCHED_EVT_HLT);
+TRACE_DEFINE_ENUM(KVM_SCHED_EVT_PV_YIELD);
+
+/*
+ * Trace when a vCPU reaches selected wait/yield handling paths. This is
+ * informational only and does not report the outcome of scheduling decisions.
+ * @target is the guest supplied value when one exists, and does not imply that
+ * the target exists, is runnable, or that a directed yield succeeded.
+ */
+TRACE_EVENT(kvm_sched_event,
+ TP_PROTO(unsigned int vcpu_id, int event, s64 target),
+ TP_ARGS(vcpu_id, event, target),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, vcpu_id)
+ __field(int, event)
+ __field(s64, target)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu_id;
+ __entry->event = event;
+ __entry->target = target;
+ ),
+
+ TP_printk("vcpu %u event %s target %lld",
+ __entry->vcpu_id,
+ __print_symbolic(__entry->event,
+ { KVM_SCHED_EVT_PLE, "ple" },
+ { KVM_SCHED_EVT_HLT, "hlt" },
+ { KVM_SCHED_EVT_PV_YIELD, "pv_yield" }),
+ __entry->target)
+);
+
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 49feecb286b2..d60026c64fc9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6104,6 +6104,8 @@ static int handle_pause(struct kvm_vcpu *vcpu)
* never set PAUSE_EXITING and just set PLE if supported,
* so the vcpu must be CPL=0 if it gets a PAUSE exit.
*/
+ trace_kvm_sched_event(vcpu->vcpu_id, KVM_SCHED_EVT_PLE,
+ KVM_SCHED_INVALID_TARGET);
kvm_vcpu_on_spin(vcpu, true);
return kvm_skip_emulated_instruction(vcpu);
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a1b63c63d1a..09d1d8242017 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10482,6 +10482,7 @@ int ____kvm_emulate_hypercall(struct kvm_vcpu *vcpu, int cpl,
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
break;
+ trace_kvm_sched_event(vcpu->vcpu_id, KVM_SCHED_EVT_PV_YIELD, a0);
kvm_sched_yield(vcpu, a0);
ret = 0;
break;
@@ -11807,6 +11808,10 @@ static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
* handling wake events.
*/
++vcpu->stat.halt_exits;
+
+ if (reason == KVM_EXIT_HLT)
+ trace_kvm_sched_event(vcpu->vcpu_id, KVM_SCHED_EVT_HLT,
+ KVM_SCHED_INVALID_TARGET);
if (lapic_in_kernel(vcpu)) {
if (kvm_vcpu_has_events(vcpu) || vcpu->arch.pv.pv_unhalted)
state = KVM_MP_STATE_RUNNABLE;
@@ -14553,6 +14558,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_rmp_fault);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_sched_event);
static int __init kvm_x86_init(void)
{
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 91fd3673c09a..f7033f0e7e13 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1586,6 +1586,8 @@ static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode,
return true;
fallthrough;
case SCHEDOP_yield:
+ trace_kvm_sched_event(vcpu->vcpu_id, KVM_SCHED_EVT_PV_YIELD,
+ KVM_SCHED_INVALID_TARGET);
kvm_vcpu_on_spin(vcpu, true);
*r = 0;
return true;
base-commit: 8bc67e4db64aa72732c474b44ea8622062c903f0
--
2.25.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] KVM: x86: Add tracepoint for vCPU wait/yield paths
2026-05-21 9:16 [PATCH] KVM: x86: Add tracepoint for vCPU wait/yield paths zhanghao
@ 2026-05-21 14:05 ` Sean Christopherson
2026-05-22 2:49 ` zhanghao
0 siblings, 1 reply; 3+ messages in thread
From: Sean Christopherson @ 2026-05-21 14:05 UTC (permalink / raw)
To: zhanghao; +Cc: Paolo Bonzini, kvm
On Thu, May 21, 2026, zhanghao wrote:
> >From 0c8d4428390a1238a956f713c1ddced18eac83da Mon Sep 17 00:00:00 2001
> From: Hao Zhang <zhanghao1@kylinos.cn>
> Date: Thu, 21 May 2026 16:30:37 +0800
>
> Add a KVM x86 tracepoint that reports when a vCPU reaches selected
> wait/yield handling paths in KVM.
Why?
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] KVM: x86: Add tracepoint for vCPU wait/yield paths
2026-05-21 14:05 ` Sean Christopherson
@ 2026-05-22 2:49 ` zhanghao
0 siblings, 0 replies; 3+ messages in thread
From: zhanghao @ 2026-05-22 2:49 UTC (permalink / raw)
To: Sean Christopherson; +Cc: Paolo Bonzini, kvm
On Thu, May 21, 2026, Sean Christopherson wrote:
> On Thu, May 21, 2026, zhanghao wrote:
> > >From 0c8d4428390a1238a956f713c1ddced18eac83da Mon Sep 17 00:00:00 2001
> > From: Hao Zhang <zhanghao1@kylinos.cn>
> > Date: Thu, 21 May 2026 16:30:37 +0800
> >
> > Add a KVM x86 tracepoint that reports when a vCPU reaches selected
> > wait/yield handling paths in KVM.
>
> Why?
The intent is not to add a scheduling policy hook, but to expose a single
observability point for KVM paths that already have wait/yield semantics.
Today userspace can infer parts of this from lower-level tracepoints, but
only by stitching together different sources:
- PLE can be inferred from VMX/SVM exit reasons via kvm_exit.
- KVM_HC_SCHED_YIELD can be inferred from kvm_hypercall.
- Hyper-V and Xen yield/spin-wait paths have their own hypercall paths.
- HLT may not be visible as a userspace exit when KVM handles the halt
internally, e.g. with in-kernel LAPIC.
Those tracepoints are useful, but they expose lower-level mechanisms rather
than the KVM-level point where the vCPU reaches an existing wait/yield
handling path. Consumers that want to correlate vCPU wait/yield behavior
with host scheduling activity currently need to understand VMX vs. SVM exit
encoding, native KVM hypercall arguments, Hyper-V/Xen hypercall semantics,
and HLT handling differences.
This tracepoint is meant to provide that common KVM-level signal:
- ple: KVM reached the PLE handling path
- hlt: KVM emulated a guest HLT
- pv_yield: KVM reached a paravirtual yield-style path
The tracepoint is informational only. It does not change KVM scheduling
behavior, directed-yield behavior, vCPU placement, or host scheduler state.
The target field is also only the guest-supplied value when one exists; it
does not imply that the target exists, is runnable, or that a directed yield
succeeded.
So the reason for adding a new tracepoint is to avoid requiring userspace
observability tools to reconstruct this higher-level wait/yield signal from
several unrelated low-level tracepoints and architecture-specific details.
Thanks,
ZhangHao
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-05-22 2:50 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-21 9:16 [PATCH] KVM: x86: Add tracepoint for vCPU wait/yield paths zhanghao
2026-05-21 14:05 ` Sean Christopherson
2026-05-22 2:49 ` zhanghao
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox